aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorJiri Kosina <jkosina@suse.cz>2010-08-04 15:14:38 +0200
committerJiri Kosina <jkosina@suse.cz>2010-08-04 15:14:38 +0200
commitd790d4d583aeaed9fc6f8a9f4d9f8ce6b1c15c7f (patch)
tree854ab394486288d40fa8179cbfaf66e8bdc44b0f /fs
parent73b2c7165b76b20eb1290e7efebc33cfd21db1ca (diff)
parent3a09b1be53d23df780a0cd0e4087a05e2ca4a00c (diff)
downloadvexpress-lsk-d790d4d583aeaed9fc6f8a9f4d9f8ce6b1c15c7f.tar.gz
Merge branch 'master' into for-next
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/Makefile4
-rw-r--r--fs/9p/fid.c111
-rw-r--r--fs/9p/v9fs.c3
-rw-r--r--fs/9p/v9fs.h1
-rw-r--r--fs/9p/v9fs_vfs.h1
-rw-r--r--fs/9p/vfs_dir.c136
-rw-r--r--fs/9p/vfs_file.c26
-rw-r--r--fs/9p/vfs_inode.c757
-rw-r--r--fs/9p/vfs_super.c50
-rw-r--r--fs/9p/xattr.c160
-rw-r--r--fs/9p/xattr.h27
-rw-r--r--fs/9p/xattr_user.c80
-rw-r--r--fs/afs/write.c1
-rw-r--r--fs/binfmt_flat.c6
-rw-r--r--fs/btrfs/ctree.c129
-rw-r--r--fs/btrfs/extent_io.c2
-rw-r--r--fs/btrfs/ioctl.c20
-rw-r--r--fs/ceph/Kconfig2
-rw-r--r--fs/ceph/auth_x.c5
-rw-r--r--fs/ceph/caps.c36
-rw-r--r--fs/ceph/crush/mapper.c41
-rw-r--r--fs/ceph/debugfs.c2
-rw-r--r--fs/ceph/dir.c13
-rw-r--r--fs/ceph/file.c2
-rw-r--r--fs/ceph/inode.c25
-rw-r--r--fs/ceph/mds_client.c51
-rw-r--r--fs/ceph/mds_client.h1
-rw-r--r--fs/ceph/messenger.c75
-rw-r--r--fs/ceph/mon_client.c9
-rw-r--r--fs/ceph/osd_client.c9
-rw-r--r--fs/ceph/osdmap.c28
-rw-r--r--fs/cifs/Kconfig9
-rw-r--r--fs/cifs/Makefile2
-rw-r--r--fs/cifs/cache.c331
-rw-r--r--fs/cifs/cifs_dfs_ref.c28
-rw-r--r--fs/cifs/cifs_fs_sb.h1
-rw-r--r--fs/cifs/cifs_spnego.c3
-rw-r--r--fs/cifs/cifsfs.c37
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h40
-rw-r--r--fs/cifs/cifsproto.h5
-rw-r--r--fs/cifs/connect.c180
-rw-r--r--fs/cifs/dir.c82
-rw-r--r--fs/cifs/dns_resolve.c71
-rw-r--r--fs/cifs/dns_resolve.h4
-rw-r--r--fs/cifs/file.c172
-rw-r--r--fs/cifs/fscache.c236
-rw-r--r--fs/cifs/fscache.h136
-rw-r--r--fs/cifs/inode.c24
-rw-r--r--fs/cifs/ioctl.c3
-rw-r--r--fs/cifs/netmisc.c24
-rw-r--r--fs/cifs/readdir.c5
-rw-r--r--fs/cifs/sess.c10
-rw-r--r--fs/cifs/smberr.h1
-rw-r--r--fs/dcache.c4
-rw-r--r--fs/direct-io.c26
-rw-r--r--fs/ecryptfs/messaging.c17
-rw-r--r--fs/ext2/acl.c1
-rw-r--r--fs/ext3/acl.c1
-rw-r--r--fs/ext4/inode.c10
-rw-r--r--fs/fcntl.c6
-rw-r--r--fs/fs-writeback.c465
-rw-r--r--fs/gfs2/aops.c9
-rw-r--r--fs/gfs2/bmap.c18
-rw-r--r--fs/gfs2/bmap.h2
-rw-r--r--fs/gfs2/dir.c44
-rw-r--r--fs/gfs2/file.c4
-rw-r--r--fs/gfs2/glock.c97
-rw-r--r--fs/gfs2/incore.h1
-rw-r--r--fs/gfs2/inode.c12
-rw-r--r--fs/gfs2/ops_fstype.c27
-rw-r--r--fs/gfs2/quota.c25
-rw-r--r--fs/gfs2/quota.h2
-rw-r--r--fs/gfs2/super.c9
-rw-r--r--fs/gfs2/sys.c57
-rw-r--r--fs/inode.c2
-rw-r--r--fs/jbd2/journal.c15
-rw-r--r--fs/jbd2/transaction.c9
-rw-r--r--fs/jffs2/xattr.c2
-rw-r--r--fs/mbcache.c5
-rw-r--r--fs/nfs/client.c122
-rw-r--r--fs/nfs/dir.c2
-rw-r--r--fs/nfs/file.c13
-rw-r--r--fs/nfs/getroot.c2
-rw-r--r--fs/nfs/internal.h3
-rw-r--r--fs/nfs/nfs4xdr.c4
-rw-r--r--fs/nfs/nfsroot.c2
-rw-r--r--fs/nfs/super.c22
-rw-r--r--fs/nfs/write.c30
-rw-r--r--fs/ocfs2/aops.c101
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c3
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c22
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c2
-rw-r--r--fs/ocfs2/file.c309
-rw-r--r--fs/ocfs2/file.h6
-rw-r--r--fs/ocfs2/journal.c30
-rw-r--r--fs/ocfs2/localalloc.c7
-rw-r--r--fs/ocfs2/quota_global.c2
-rw-r--r--fs/ocfs2/quota_local.c4
-rw-r--r--fs/ocfs2/refcounttree.c12
-rw-r--r--fs/ocfs2/reservations.c1
-rw-r--r--fs/ocfs2/suballoc.c2
-rw-r--r--fs/ocfs2/xattr.c200
-rw-r--r--fs/partitions/ibm.c14
-rw-r--r--fs/proc/array.c2
-rw-r--r--fs/proc/proc_devtree.c3
-rw-r--r--fs/proc/task_nommu.c20
-rw-r--r--fs/quota/dquot.c2
-rw-r--r--fs/splice.c9
-rw-r--r--fs/super.c6
-rw-r--r--fs/sysfs/symlink.c26
-rw-r--r--fs/sysv/ialloc.c6
-rw-r--r--fs/ubifs/budget.c2
-rw-r--r--fs/ubifs/lpt.c14
-rw-r--r--fs/ubifs/lpt_commit.c2
-rw-r--r--fs/ubifs/recovery.c23
-rw-r--r--fs/ubifs/shrinker.c2
-rw-r--r--fs/ubifs/super.c4
-rw-r--r--fs/ubifs/ubifs.h2
-rw-r--r--fs/xfs/Makefile4
-rw-r--r--fs/xfs/linux-2.6/xfs_acl.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c611
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.h4
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c67
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h119
-rw-r--r--fs/xfs/linux-2.6/xfs_dmapi_priv.h28
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c15
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c104
-rw-r--r--fs/xfs/linux-2.6/xfs_fs_subr.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_fs_subr.h25
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c34
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c21
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c10
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_quotaops.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c173
-rw-r--r--fs/xfs/linux-2.6/xfs_super.h7
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c179
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h5
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h131
-rw-r--r--fs/xfs/quota/xfs_dquot.c114
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c301
-rw-r--r--fs/xfs/quota/xfs_qm.c40
-rw-r--r--fs/xfs/quota/xfs_qm_bhv.c10
-rw-r--r--fs/xfs/quota/xfs_qm_stats.c10
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c112
-rw-r--r--fs/xfs/quota/xfs_trans_dquot.c35
-rw-r--r--fs/xfs/support/debug.c1
-rw-r--r--fs/xfs/xfs_alloc.c15
-rw-r--r--fs/xfs/xfs_alloc.h20
-rw-r--r--fs/xfs/xfs_alloc_btree.c5
-rw-r--r--fs/xfs/xfs_attr.c91
-rw-r--r--fs/xfs/xfs_attr_leaf.c5
-rw-r--r--fs/xfs/xfs_bmap.c327
-rw-r--r--fs/xfs/xfs_bmap.h37
-rw-r--r--fs/xfs/xfs_bmap_btree.c5
-rw-r--r--fs/xfs/xfs_btree.c5
-rw-r--r--fs/xfs/xfs_buf_item.c228
-rw-r--r--fs/xfs/xfs_buf_item.h2
-rw-r--r--fs/xfs/xfs_da_btree.c20
-rw-r--r--fs/xfs/xfs_dfrag.c21
-rw-r--r--fs/xfs/xfs_dir2.c11
-rw-r--r--fs/xfs/xfs_dir2_block.c8
-rw-r--r--fs/xfs/xfs_dir2_data.c2
-rw-r--r--fs/xfs/xfs_dir2_leaf.c4
-rw-r--r--fs/xfs/xfs_dir2_node.c2
-rw-r--r--fs/xfs/xfs_dir2_sf.c2
-rw-r--r--fs/xfs/xfs_dmapi.h170
-rw-r--r--fs/xfs/xfs_dmops.c55
-rw-r--r--fs/xfs/xfs_error.c4
-rw-r--r--fs/xfs/xfs_extfree_item.c278
-rw-r--r--fs/xfs/xfs_filestream.c84
-rw-r--r--fs/xfs/xfs_filestream.h82
-rw-r--r--fs/xfs/xfs_fsops.c7
-rw-r--r--fs/xfs/xfs_ialloc.c146
-rw-r--r--fs/xfs/xfs_ialloc_btree.c4
-rw-r--r--fs/xfs/xfs_iget.c118
-rw-r--r--fs/xfs/xfs_inode.c65
-rw-r--r--fs/xfs/xfs_inode.h10
-rw-r--r--fs/xfs/xfs_inode_item.c273
-rw-r--r--fs/xfs/xfs_inode_item.h12
-rw-r--r--fs/xfs/xfs_iomap.c76
-rw-r--r--fs/xfs/xfs_iomap.h22
-rw-r--r--fs/xfs/xfs_itable.c293
-rw-r--r--fs/xfs/xfs_itable.h17
-rw-r--r--fs/xfs/xfs_log.c16
-rw-r--r--fs/xfs/xfs_log.h11
-rw-r--r--fs/xfs/xfs_log_cil.c4
-rw-r--r--fs/xfs/xfs_log_recover.c44
-rw-r--r--fs/xfs/xfs_mount.c5
-rw-r--r--fs/xfs/xfs_mount.h71
-rw-r--r--fs/xfs/xfs_rename.c63
-rw-r--r--fs/xfs/xfs_rtalloc.c13
-rw-r--r--fs/xfs/xfs_rw.c15
-rw-r--r--fs/xfs/xfs_trans.c211
-rw-r--r--fs/xfs/xfs_trans.h117
-rw-r--r--fs/xfs/xfs_trans_ail.c1
-rw-r--r--fs/xfs/xfs_trans_buf.c75
-rw-r--r--fs/xfs/xfs_trans_extfree.c23
-rw-r--r--fs/xfs/xfs_trans_inode.c76
-rw-r--r--fs/xfs/xfs_trans_item.c441
-rw-r--r--fs/xfs/xfs_trans_priv.h18
-rw-r--r--fs/xfs/xfs_utils.c87
-rw-r--r--fs/xfs/xfs_utils.h1
-rw-r--r--fs/xfs/xfs_vnodeops.c295
206 files changed, 5706 insertions, 5706 deletions
diff --git a/fs/9p/Makefile b/fs/9p/Makefile
index 1a940ec7af6..91fba025fcb 100644
--- a/fs/9p/Makefile
+++ b/fs/9p/Makefile
@@ -8,6 +8,8 @@ obj-$(CONFIG_9P_FS) := 9p.o
vfs_dir.o \
vfs_dentry.o \
v9fs.o \
- fid.o
+ fid.o \
+ xattr.o \
+ xattr_user.o
9p-$(CONFIG_9P_FSCACHE) += cache.o
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 7317b39b281..35856368906 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -97,6 +97,34 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, u32 uid, int any)
return ret;
}
+/*
+ * We need to hold v9ses->rename_sem as long as we hold references
+ * to returned path array. Array element contain pointers to
+ * dentry names.
+ */
+static int build_path_from_dentry(struct v9fs_session_info *v9ses,
+ struct dentry *dentry, char ***names)
+{
+ int n = 0, i;
+ char **wnames;
+ struct dentry *ds;
+
+ for (ds = dentry; !IS_ROOT(ds); ds = ds->d_parent)
+ n++;
+
+ wnames = kmalloc(sizeof(char *) * n, GFP_KERNEL);
+ if (!wnames)
+ goto err_out;
+
+ for (ds = dentry, i = (n-1); i >= 0; i--, ds = ds->d_parent)
+ wnames[i] = (char *)ds->d_name.name;
+
+ *names = wnames;
+ return n;
+err_out:
+ return -ENOMEM;
+}
+
/**
* v9fs_fid_lookup - lookup for a fid, try to walk if not found
* @dentry: dentry to look for fid in
@@ -112,7 +140,7 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
int i, n, l, clone, any, access;
u32 uid;
struct p9_fid *fid, *old_fid = NULL;
- struct dentry *d, *ds;
+ struct dentry *ds;
struct v9fs_session_info *v9ses;
char **wnames, *uname;
@@ -139,49 +167,62 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
fid = v9fs_fid_find(dentry, uid, any);
if (fid)
return fid;
-
+ /*
+ * we don't have a matching fid. To do a TWALK we need
+ * parent fid. We need to prevent rename when we want to
+ * look at the parent.
+ */
+ down_read(&v9ses->rename_sem);
ds = dentry->d_parent;
fid = v9fs_fid_find(ds, uid, any);
- if (!fid) { /* walk from the root */
- n = 0;
- for (ds = dentry; !IS_ROOT(ds); ds = ds->d_parent)
- n++;
+ if (fid) {
+ /* Found the parent fid do a lookup with that */
+ fid = p9_client_walk(fid, 1, (char **)&dentry->d_name.name, 1);
+ goto fid_out;
+ }
+ up_read(&v9ses->rename_sem);
- fid = v9fs_fid_find(ds, uid, any);
- if (!fid) { /* the user is not attached to the fs yet */
- if (access == V9FS_ACCESS_SINGLE)
- return ERR_PTR(-EPERM);
+ /* start from the root and try to do a lookup */
+ fid = v9fs_fid_find(dentry->d_sb->s_root, uid, any);
+ if (!fid) {
+ /* the user is not attached to the fs yet */
+ if (access == V9FS_ACCESS_SINGLE)
+ return ERR_PTR(-EPERM);
- if (v9fs_proto_dotu(v9ses))
+ if (v9fs_proto_dotu(v9ses) || v9fs_proto_dotl(v9ses))
uname = NULL;
- else
- uname = v9ses->uname;
+ else
+ uname = v9ses->uname;
- fid = p9_client_attach(v9ses->clnt, NULL, uname, uid,
- v9ses->aname);
-
- if (IS_ERR(fid))
- return fid;
-
- v9fs_fid_add(ds, fid);
- }
- } else /* walk from the parent */
- n = 1;
+ fid = p9_client_attach(v9ses->clnt, NULL, uname, uid,
+ v9ses->aname);
+ if (IS_ERR(fid))
+ return fid;
- if (ds == dentry)
+ v9fs_fid_add(dentry->d_sb->s_root, fid);
+ }
+ /* If we are root ourself just return that */
+ if (dentry->d_sb->s_root == dentry)
return fid;
-
- wnames = kmalloc(sizeof(char *) * n, GFP_KERNEL);
- if (!wnames)
- return ERR_PTR(-ENOMEM);
-
- for (d = dentry, i = (n-1); i >= 0; i--, d = d->d_parent)
- wnames[i] = (char *) d->d_name.name;
-
+ /*
+ * Do a multipath walk with attached root.
+ * When walking parent we need to make sure we
+ * don't have a parallel rename happening
+ */
+ down_read(&v9ses->rename_sem);
+ n = build_path_from_dentry(v9ses, dentry, &wnames);
+ if (n < 0) {
+ fid = ERR_PTR(n);
+ goto err_out;
+ }
clone = 1;
i = 0;
while (i < n) {
l = min(n - i, P9_MAXWELEM);
+ /*
+ * We need to hold rename lock when doing a multipath
+ * walk to ensure none of the patch component change
+ */
fid = p9_client_walk(fid, l, &wnames[i], clone);
if (IS_ERR(fid)) {
if (old_fid) {
@@ -193,15 +234,17 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
p9_client_clunk(old_fid);
}
kfree(wnames);
- return fid;
+ goto err_out;
}
old_fid = fid;
i += l;
clone = 0;
}
-
kfree(wnames);
+fid_out:
v9fs_fid_add(dentry, fid);
+err_out:
+ up_read(&v9ses->rename_sem);
return fid;
}
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index f8b86e92cd6..38dc0e06759 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -237,6 +237,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
__putname(v9ses->uname);
return ERR_PTR(-ENOMEM);
}
+ init_rwsem(&v9ses->rename_sem);
rc = bdi_setup_and_register(&v9ses->bdi, "9p", BDI_CAP_MAP_COPY);
if (rc) {
@@ -278,7 +279,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ;
/* for legacy mode, fall back to V9FS_ACCESS_ANY */
- if (!v9fs_proto_dotu(v9ses) &&
+ if (!(v9fs_proto_dotu(v9ses) || v9fs_proto_dotl(v9ses)) &&
((v9ses->flags&V9FS_ACCESS_MASK) == V9FS_ACCESS_USER)) {
v9ses->flags &= ~V9FS_ACCESS_MASK;
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index bec4d0bcb45..4c963c9fc41 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -104,6 +104,7 @@ struct v9fs_session_info {
struct p9_client *clnt; /* 9p client */
struct list_head slist; /* list of sessions registered with v9fs */
struct backing_dev_info bdi;
+ struct rw_semaphore rename_sem;
};
struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *,
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 32ef4009d03..f47c6bbb01b 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -55,6 +55,7 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode);
void v9fs_clear_inode(struct inode *inode);
ino_t v9fs_qid2ino(struct p9_qid *qid);
void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *);
+void v9fs_stat2inode_dotl(struct p9_stat_dotl *, struct inode *);
int v9fs_dir_release(struct inode *inode, struct file *filp);
int v9fs_file_open(struct inode *inode, struct file *file);
void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat);
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index d61e3b28ce3..16c8a2a98c1 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -87,29 +87,19 @@ static void p9stat_init(struct p9_wstat *stbuf)
}
/**
- * v9fs_dir_readdir - read a directory
+ * v9fs_alloc_rdir_buf - Allocate buffer used for read and readdir
* @filp: opened file structure
- * @dirent: directory structure ???
- * @filldir: function to populate directory structure ???
+ * @buflen: Length in bytes of buffer to allocate
*
*/
-static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
+static int v9fs_alloc_rdir_buf(struct file *filp, int buflen)
{
- int over;
- struct p9_wstat st;
- int err = 0;
- struct p9_fid *fid;
- int buflen;
- int reclen = 0;
struct p9_rdir *rdir;
+ struct p9_fid *fid;
+ int err = 0;
- P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name);
fid = filp->private_data;
-
- buflen = fid->clnt->msize - P9_IOHDRSZ;
-
- /* allocate rdir on demand */
if (!fid->rdir) {
rdir = kmalloc(sizeof(struct p9_rdir) + buflen, GFP_KERNEL);
@@ -128,6 +118,36 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
spin_unlock(&filp->f_dentry->d_lock);
kfree(rdir);
}
+exit:
+ return err;
+}
+
+/**
+ * v9fs_dir_readdir - read a directory
+ * @filp: opened file structure
+ * @dirent: directory structure ???
+ * @filldir: function to populate directory structure ???
+ *
+ */
+
+static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+ int over;
+ struct p9_wstat st;
+ int err = 0;
+ struct p9_fid *fid;
+ int buflen;
+ int reclen = 0;
+ struct p9_rdir *rdir;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name);
+ fid = filp->private_data;
+
+ buflen = fid->clnt->msize - P9_IOHDRSZ;
+
+ err = v9fs_alloc_rdir_buf(filp, buflen);
+ if (err)
+ goto exit;
rdir = (struct p9_rdir *) fid->rdir;
err = mutex_lock_interruptible(&rdir->mutex);
@@ -146,7 +166,7 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
while (rdir->head < rdir->tail) {
p9stat_init(&st);
err = p9stat_read(rdir->buf + rdir->head,
- buflen - rdir->head, &st,
+ rdir->tail - rdir->head, &st,
fid->clnt->proto_version);
if (err) {
P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err);
@@ -176,6 +196,88 @@ exit:
return err;
}
+/**
+ * v9fs_dir_readdir_dotl - read a directory
+ * @filp: opened file structure
+ * @dirent: buffer to fill dirent structures
+ * @filldir: function to populate dirent structures
+ *
+ */
+static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent,
+ filldir_t filldir)
+{
+ int over;
+ int err = 0;
+ struct p9_fid *fid;
+ int buflen;
+ struct p9_rdir *rdir;
+ struct p9_dirent curdirent;
+ u64 oldoffset = 0;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name);
+ fid = filp->private_data;
+
+ buflen = fid->clnt->msize - P9_READDIRHDRSZ;
+
+ err = v9fs_alloc_rdir_buf(filp, buflen);
+ if (err)
+ goto exit;
+ rdir = (struct p9_rdir *) fid->rdir;
+
+ err = mutex_lock_interruptible(&rdir->mutex);
+ if (err)
+ return err;
+
+ while (err == 0) {
+ if (rdir->tail == rdir->head) {
+ err = p9_client_readdir(fid, rdir->buf, buflen,
+ filp->f_pos);
+ if (err <= 0)
+ goto unlock_and_exit;
+
+ rdir->head = 0;
+ rdir->tail = err;
+ }
+
+ while (rdir->head < rdir->tail) {
+
+ err = p9dirent_read(rdir->buf + rdir->head,
+ buflen - rdir->head, &curdirent,
+ fid->clnt->proto_version);
+ if (err < 0) {
+ P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err);
+ err = -EIO;
+ goto unlock_and_exit;
+ }
+
+ /* d_off in dirent structure tracks the offset into
+ * the next dirent in the dir. However, filldir()
+ * expects offset into the current dirent. Hence
+ * while calling filldir send the offset from the
+ * previous dirent structure.
+ */
+ over = filldir(dirent, curdirent.d_name,
+ strlen(curdirent.d_name),
+ oldoffset, v9fs_qid2ino(&curdirent.qid),
+ curdirent.d_type);
+ oldoffset = curdirent.d_off;
+
+ if (over) {
+ err = 0;
+ goto unlock_and_exit;
+ }
+
+ filp->f_pos = curdirent.d_off;
+ rdir->head += err;
+ }
+ }
+
+unlock_and_exit:
+ mutex_unlock(&rdir->mutex);
+exit:
+ return err;
+}
+
/**
* v9fs_dir_release - close a directory
@@ -207,7 +309,7 @@ const struct file_operations v9fs_dir_operations = {
const struct file_operations v9fs_dir_operations_dotl = {
.read = generic_read_dir,
.llseek = generic_file_llseek,
- .readdir = v9fs_dir_readdir,
+ .readdir = v9fs_dir_readdir_dotl,
.open = v9fs_file_open,
.release = v9fs_dir_release,
};
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 2bedc6c94fc..e97c92bd6f1 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -59,9 +59,13 @@ int v9fs_file_open(struct inode *inode, struct file *file)
struct p9_fid *fid;
int omode;
- P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p \n", inode, file);
+ P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, file);
v9ses = v9fs_inode2v9ses(inode);
- omode = v9fs_uflags2omode(file->f_flags, v9fs_proto_dotu(v9ses));
+ if (v9fs_proto_dotl(v9ses))
+ omode = file->f_flags;
+ else
+ omode = v9fs_uflags2omode(file->f_flags,
+ v9fs_proto_dotu(v9ses));
fid = file->private_data;
if (!fid) {
fid = v9fs_fid_clone(file->f_path.dentry);
@@ -73,11 +77,12 @@ int v9fs_file_open(struct inode *inode, struct file *file)
p9_client_clunk(fid);
return err;
}
- if (omode & P9_OTRUNC) {
+ if (file->f_flags & O_TRUNC) {
i_size_write(inode, 0);
inode->i_blocks = 0;
}
- if ((file->f_flags & O_APPEND) && (!v9fs_proto_dotu(v9ses)))
+ if ((file->f_flags & O_APPEND) &&
+ (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses)))
generic_file_llseek(file, 0, SEEK_END);
}
@@ -139,7 +144,7 @@ ssize_t
v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
u64 offset)
{
- int n, total;
+ int n, total, size;
struct p9_fid *fid = filp->private_data;
P9_DPRINTK(P9_DEBUG_VFS, "fid %d offset %llu count %d\n", fid->fid,
@@ -147,6 +152,7 @@ v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
n = 0;
total = 0;
+ size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ;
do {
n = p9_client_read(fid, data, udata, offset, count);
if (n <= 0)
@@ -160,7 +166,7 @@ v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
offset += n;
count -= n;
total += n;
- } while (count > 0 && n == (fid->clnt->msize - P9_IOHDRSZ));
+ } while (count > 0 && n == size);
if (n < 0)
total = n;
@@ -183,11 +189,13 @@ v9fs_file_read(struct file *filp, char __user *udata, size_t count,
{
int ret;
struct p9_fid *fid;
+ size_t size;
P9_DPRINTK(P9_DEBUG_VFS, "count %zu offset %lld\n", count, *offset);
fid = filp->private_data;
- if (count > (fid->clnt->msize - P9_IOHDRSZ))
+ size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ;
+ if (count > size)
ret = v9fs_file_readn(filp, NULL, udata, count, *offset);
else
ret = p9_client_read(fid, NULL, udata, *offset, count);
@@ -224,9 +232,7 @@ v9fs_file_write(struct file *filp, const char __user * data,
fid = filp->private_data;
clnt = fid->clnt;
- rsize = fid->iounit;
- if (!rsize || rsize > clnt->msize-P9_IOHDRSZ)
- rsize = clnt->msize - P9_IOHDRSZ;
+ rsize = fid->iounit ? fid->iounit : clnt->msize - P9_IOHDRSZ;
do {
if (count < rsize)
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 4331b3b5ee1..6e94f3247ce 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -35,6 +35,7 @@
#include <linux/idr.h>
#include <linux/sched.h>
#include <linux/slab.h>
+#include <linux/xattr.h>
#include <net/9p/9p.h>
#include <net/9p/client.h>
@@ -42,6 +43,7 @@
#include "v9fs_vfs.h"
#include "fid.h"
#include "cache.h"
+#include "xattr.h"
static const struct inode_operations v9fs_dir_inode_operations;
static const struct inode_operations v9fs_dir_inode_operations_dotu;
@@ -236,6 +238,41 @@ void v9fs_destroy_inode(struct inode *inode)
#endif
/**
+ * v9fs_get_fsgid_for_create - Helper function to get the gid for creating a
+ * new file system object. This checks the S_ISGID to determine the owning
+ * group of the new file system object.
+ */
+
+static gid_t v9fs_get_fsgid_for_create(struct inode *dir_inode)
+{
+ BUG_ON(dir_inode == NULL);
+
+ if (dir_inode->i_mode & S_ISGID) {
+ /* set_gid bit is set.*/
+ return dir_inode->i_gid;
+ }
+ return current_fsgid();
+}
+
+/**
+ * v9fs_dentry_from_dir_inode - helper function to get the dentry from
+ * dir inode.
+ *
+ */
+
+static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode)
+{
+ struct dentry *dentry;
+
+ spin_lock(&dcache_lock);
+ /* Directory should have only one entry. */
+ BUG_ON(S_ISDIR(inode->i_mode) && !list_is_singular(&inode->i_dentry));
+ dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias);
+ spin_unlock(&dcache_lock);
+ return dentry;
+}
+
+/**
* v9fs_get_inode - helper function to setup an inode
* @sb: superblock
* @mode: mode to setup inode with
@@ -267,7 +304,13 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
case S_IFBLK:
case S_IFCHR:
case S_IFSOCK:
- if (!v9fs_proto_dotu(v9ses)) {
+ if (v9fs_proto_dotl(v9ses)) {
+ inode->i_op = &v9fs_file_inode_operations_dotl;
+ inode->i_fop = &v9fs_file_operations_dotl;
+ } else if (v9fs_proto_dotu(v9ses)) {
+ inode->i_op = &v9fs_file_inode_operations;
+ inode->i_fop = &v9fs_file_operations;
+ } else {
P9_DPRINTK(P9_DEBUG_ERROR,
"special files without extended mode\n");
err = -EINVAL;
@@ -396,23 +439,14 @@ void v9fs_clear_inode(struct inode *inode)
#endif
}
-/**
- * v9fs_inode_from_fid - populate an inode by issuing a attribute request
- * @v9ses: session information
- * @fid: fid to issue attribute request for
- * @sb: superblock on which to create inode
- *
- */
-
static struct inode *
-v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
+v9fs_inode(struct v9fs_session_info *v9ses, struct p9_fid *fid,
struct super_block *sb)
{
int err, umode;
- struct inode *ret;
+ struct inode *ret = NULL;
struct p9_wstat *st;
- ret = NULL;
st = p9_client_stat(fid);
if (IS_ERR(st))
return ERR_CAST(st);
@@ -433,15 +467,62 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
#endif
p9stat_free(st);
kfree(st);
-
return ret;
-
error:
p9stat_free(st);
kfree(st);
return ERR_PTR(err);
}
+static struct inode *
+v9fs_inode_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid,
+ struct super_block *sb)
+{
+ struct inode *ret = NULL;
+ int err;
+ struct p9_stat_dotl *st;
+
+ st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
+ if (IS_ERR(st))
+ return ERR_CAST(st);
+
+ ret = v9fs_get_inode(sb, st->st_mode);
+ if (IS_ERR(ret)) {
+ err = PTR_ERR(ret);
+ goto error;
+ }
+
+ v9fs_stat2inode_dotl(st, ret);
+ ret->i_ino = v9fs_qid2ino(&st->qid);
+#ifdef CONFIG_9P_FSCACHE
+ v9fs_vcookie_set_qid(ret, &st->qid);
+ v9fs_cache_inode_get_cookie(ret);
+#endif
+ kfree(st);
+ return ret;
+error:
+ kfree(st);
+ return ERR_PTR(err);
+}
+
+/**
+ * v9fs_inode_from_fid - Helper routine to populate an inode by
+ * issuing a attribute request
+ * @v9ses: session information
+ * @fid: fid to issue attribute request for
+ * @sb: superblock on which to create inode
+ *
+ */
+static inline struct inode *
+v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
+ struct super_block *sb)
+{
+ if (v9fs_proto_dotl(v9ses))
+ return v9fs_inode_dotl(v9ses, fid, sb);
+ else
+ return v9fs_inode(v9ses, fid, sb);
+}
+
/**
* v9fs_remove - helper function to remove files and directories
* @dir: directory inode that is being deleted
@@ -563,6 +644,118 @@ error:
}
/**
+ * v9fs_vfs_create_dotl - VFS hook to create files for 9P2000.L protocol.
+ * @dir: directory inode that is being created
+ * @dentry: dentry that is being deleted
+ * @mode: create permissions
+ * @nd: path information
+ *
+ */
+
+static int
+v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int mode,
+ struct nameidata *nd)
+{
+ int err = 0;
+ char *name = NULL;
+ gid_t gid;
+ int flags;
+ struct v9fs_session_info *v9ses;
+ struct p9_fid *fid = NULL;
+ struct p9_fid *dfid, *ofid;
+ struct file *filp;
+ struct p9_qid qid;
+ struct inode *inode;
+
+ v9ses = v9fs_inode2v9ses(dir);
+ if (nd && nd->flags & LOOKUP_OPEN)
+ flags = nd->intent.open.flags - 1;
+ else
+ flags = O_RDWR;
+
+ name = (char *) dentry->d_name.name;
+ P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_create_dotl: name:%s flags:0x%x "
+ "mode:0x%x\n", name, flags, mode);
+
+ dfid = v9fs_fid_lookup(dentry->d_parent);
+ if (IS_ERR(dfid)) {
+ err = PTR_ERR(dfid);
+ P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
+ return err;
+ }
+
+ /* clone a fid to use for creation */
+ ofid = p9_client_walk(dfid, 0, NULL, 1);
+ if (IS_ERR(ofid)) {
+ err = PTR_ERR(ofid);
+ P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
+ return err;
+ }
+
+ gid = v9fs_get_fsgid_for_create(dir);
+ err = p9_client_create_dotl(ofid, name, flags, mode, gid, &qid);
+ if (err < 0) {
+ P9_DPRINTK(P9_DEBUG_VFS,
+ "p9_client_open_dotl failed in creat %d\n",
+ err);
+ goto error;
+ }
+
+ /* No need to populate the inode if we are not opening the file AND
+ * not in cached mode.
+ */
+ if (!v9ses->cache && !(nd && nd->flags & LOOKUP_OPEN)) {
+ /* Not in cached mode. No need to populate inode with stat */
+ dentry->d_op = &v9fs_dentry_operations;
+ p9_client_clunk(ofid);
+ d_instantiate(dentry, NULL);
+ return 0;
+ }
+
+ /* Now walk from the parent so we can get an unopened fid. */
+ fid = p9_client_walk(dfid, 1, &name, 1);
+ if (IS_ERR(fid)) {
+ err = PTR_ERR(fid);
+ P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
+ fid = NULL;
+ goto error;
+ }
+
+ /* instantiate inode and assign the unopened fid to dentry */
+ inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
+ goto error;
+ }
+ dentry->d_op = &v9fs_cached_dentry_operations;
+ d_instantiate(dentry, inode);
+ err = v9fs_fid_add(dentry, fid);
+ if (err < 0)
+ goto error;
+
+ /* if we are opening a file, assign the open fid to the file */
+ if (nd && nd->flags & LOOKUP_OPEN) {
+ filp = lookup_instantiate_filp(nd, dentry, v9fs_open_created);
+ if (IS_ERR(filp)) {
+ p9_client_clunk(ofid);
+ return PTR_ERR(filp);
+ }
+ filp->private_data = ofid;
+ } else
+ p9_client_clunk(ofid);
+
+ return 0;
+
+error:
+ if (ofid)
+ p9_client_clunk(ofid);
+ if (fid)
+ p9_client_clunk(fid);
+ return err;
+}
+
+/**
* v9fs_vfs_create - VFS hook to create files
* @dir: directory inode that is being created
* @dentry: dentry that is being deleted
@@ -652,6 +845,83 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
return err;
}
+
+/**
+ * v9fs_vfs_mkdir_dotl - VFS mkdir hook to create a directory
+ * @dir: inode that is being unlinked
+ * @dentry: dentry that is being unlinked
+ * @mode: mode for new directory
+ *
+ */
+
+static int v9fs_vfs_mkdir_dotl(struct inode *dir, struct dentry *dentry,
+ int mode)
+{
+ int err;
+ struct v9fs_session_info *v9ses;
+ struct p9_fid *fid = NULL, *dfid = NULL;
+ gid_t gid;
+ char *name;
+ struct inode *inode;
+ struct p9_qid qid;
+ struct dentry *dir_dentry;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
+ err = 0;
+ v9ses = v9fs_inode2v9ses(dir);
+
+ mode |= S_IFDIR;
+ dir_dentry = v9fs_dentry_from_dir_inode(dir);
+ dfid = v9fs_fid_lookup(dir_dentry);
+ if (IS_ERR(dfid)) {
+ err = PTR_ERR(dfid);
+ P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
+ dfid = NULL;
+ goto error;
+ }
+
+ gid = v9fs_get_fsgid_for_create(dir);
+ if (gid < 0) {
+ P9_DPRINTK(P9_DEBUG_VFS, "v9fs_get_fsgid_for_create failed\n");
+ goto error;
+ }
+
+ name = (char *) dentry->d_name.name;
+ err = p9_client_mkdir_dotl(dfid, name, mode, gid, &qid);
+ if (err < 0)
+ goto error;
+
+ /* instantiate inode and assign the unopened fid to the dentry */
+ if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
+ fid = p9_client_walk(dfid, 1, &name, 1);
+ if (IS_ERR(fid)) {
+ err = PTR_ERR(fid);
+ P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
+ err);
+ fid = NULL;
+ goto error;
+ }
+
+ inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
+ err);
+ goto error;
+ }
+ dentry->d_op = &v9fs_cached_dentry_operations;
+ d_instantiate(dentry, inode);
+ err = v9fs_fid_add(dentry, fid);
+ if (err < 0)
+ goto error;
+ fid = NULL;
+ }
+error:
+ if (fid)
+ p9_client_clunk(fid);
+ return err;
+}
+
/**
* v9fs_vfs_lookup - VFS lookup hook to "walk" to a new inode
* @dir: inode that is being walked from
@@ -678,6 +948,7 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
sb = dir->i_sb;
v9ses = v9fs_inode2v9ses(dir);
+ /* We can walk d_parent because we hold the dir->i_mutex */
dfid = v9fs_fid_lookup(dentry->d_parent);
if (IS_ERR(dfid))
return ERR_CAST(dfid);
@@ -785,27 +1056,33 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
goto clunk_olddir;
}
+ down_write(&v9ses->rename_sem);
if (v9fs_proto_dotl(v9ses)) {
retval = p9_client_rename(oldfid, newdirfid,
(char *) new_dentry->d_name.name);
if (retval != -ENOSYS)
goto clunk_newdir;
}
+ if (old_dentry->d_parent != new_dentry->d_parent) {
+ /*
+ * 9P .u can only handle file rename in the same directory
+ */
- /* 9P can only handle file rename in the same directory */
- if (memcmp(&olddirfid->qid, &newdirfid->qid, sizeof(newdirfid->qid))) {
P9_DPRINTK(P9_DEBUG_ERROR,
"old dir and new dir are different\n");
retval = -EXDEV;
goto clunk_newdir;
}
-
v9fs_blank_wstat(&wstat);
wstat.muid = v9ses->uname;
wstat.name = (char *) new_dentry->d_name.name;
retval = p9_client_wstat(oldfid, &wstat);
clunk_newdir:
+ if (!retval)
+ /* successful rename */
+ d_move(old_dentry, new_dentry);
+ up_write(&v9ses->rename_sem);
p9_client_clunk(newdirfid);
clunk_olddir:
@@ -853,6 +1130,42 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
return 0;
}
+static int
+v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry,
+ struct kstat *stat)
+{
+ int err;
+ struct v9fs_session_info *v9ses;
+ struct p9_fid *fid;
+ struct p9_stat_dotl *st;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
+ err = -EPERM;
+ v9ses = v9fs_inode2v9ses(dentry->d_inode);
+ if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
+ return simple_getattr(mnt, dentry, stat);
+
+ fid = v9fs_fid_lookup(dentry);
+ if (IS_ERR(fid))
+ return PTR_ERR(fid);
+
+ /* Ask for all the fields in stat structure. Server will return
+ * whatever it supports
+ */
+
+ st = p9_client_getattr_dotl(fid, P9_STATS_ALL);
+ if (IS_ERR(st))
+ return PTR_ERR(st);
+
+ v9fs_stat2inode_dotl(st, dentry->d_inode);
+ generic_fillattr(dentry->d_inode, stat);
+ /* Change block size to what the server returned */
+ stat->blksize = st->st_blksize;
+
+ kfree(st);
+ return 0;
+}
+
/**
* v9fs_vfs_setattr - set file metadata
* @dentry: file whose metadata to set
@@ -903,6 +1216,49 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
}
/**
+ * v9fs_vfs_setattr_dotl - set file metadata
+ * @dentry: file whose metadata to set
+ * @iattr: metadata assignment structure
+ *
+ */
+
+static int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
+{
+ int retval;
+ struct v9fs_session_info *v9ses;
+ struct p9_fid *fid;
+ struct p9_iattr_dotl p9attr;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "\n");
+
+ retval = inode_change_ok(dentry->d_inode, iattr);
+ if (retval)
+ return retval;
+
+ p9attr.valid = iattr->ia_valid;
+ p9attr.mode = iattr->ia_mode;
+ p9attr.uid = iattr->ia_uid;
+ p9attr.gid = iattr->ia_gid;
+ p9attr.size = iattr->ia_size;
+ p9attr.atime_sec = iattr->ia_atime.tv_sec;
+ p9attr.atime_nsec = iattr->ia_atime.tv_nsec;
+ p9attr.mtime_sec = iattr->ia_mtime.tv_sec;
+ p9attr.mtime_nsec = iattr->ia_mtime.tv_nsec;
+
+ retval = -EPERM;
+ v9ses = v9fs_inode2v9ses(dentry->d_inode);
+ fid = v9fs_fid_lookup(dentry);
+ if (IS_ERR(fid))
+ return PTR_ERR(fid);
+
+ retval = p9_client_setattr(fid, &p9attr);
+ if (retval >= 0)
+ retval = inode_setattr(dentry->d_inode, iattr);
+
+ return retval;
+}
+
+/**
* v9fs_stat2inode - populate an inode structure with mistat info
* @stat: Plan 9 metadata (mistat) structure
* @inode: inode to populate
@@ -980,6 +1336,77 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
}
/**
+ * v9fs_stat2inode_dotl - populate an inode structure with stat info
+ * @stat: stat structure
+ * @inode: inode to populate
+ * @sb: superblock of filesystem
+ *
+ */
+
+void
+v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
+{
+
+ if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) {
+ inode->i_atime.tv_sec = stat->st_atime_sec;
+ inode->i_atime.tv_nsec = stat->st_atime_nsec;
+ inode->i_mtime.tv_sec = stat->st_mtime_sec;
+ inode->i_mtime.tv_nsec = stat->st_mtime_nsec;
+ inode->i_ctime.tv_sec = stat->st_ctime_sec;
+ inode->i_ctime.tv_nsec = stat->st_ctime_nsec;
+ inode->i_uid = stat->st_uid;
+ inode->i_gid = stat->st_gid;
+ inode->i_nlink = stat->st_nlink;
+ inode->i_mode = stat->st_mode;
+ inode->i_rdev = new_decode_dev(stat->st_rdev);
+
+ if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode)))
+ init_special_inode(inode, inode->i_mode, inode->i_rdev);
+
+ i_size_write(inode, stat->st_size);
+ inode->i_blocks = stat->st_blocks;
+ } else {
+ if (stat->st_result_mask & P9_STATS_ATIME) {
+ inode->i_atime.tv_sec = stat->st_atime_sec;
+ inode->i_atime.tv_nsec = stat->st_atime_nsec;
+ }
+ if (stat->st_result_mask & P9_STATS_MTIME) {
+ inode->i_mtime.tv_sec = stat->st_mtime_sec;
+ inode->i_mtime.tv_nsec = stat->st_mtime_nsec;
+ }
+ if (stat->st_result_mask & P9_STATS_CTIME) {
+ inode->i_ctime.tv_sec = stat->st_ctime_sec;
+ inode->i_ctime.tv_nsec = stat->st_ctime_nsec;
+ }
+ if (stat->st_result_mask & P9_STATS_UID)
+ inode->i_uid = stat->st_uid;
+ if (stat->st_result_mask & P9_STATS_GID)
+ inode->i_gid = stat->st_gid;
+ if (stat->st_result_mask & P9_STATS_NLINK)
+ inode->i_nlink = stat->st_nlink;
+ if (stat->st_result_mask & P9_STATS_MODE) {
+ inode->i_mode = stat->st_mode;
+ if ((S_ISBLK(inode->i_mode)) ||
+ (S_ISCHR(inode->i_mode)))
+ init_special_inode(inode, inode->i_mode,
+ inode->i_rdev);
+ }
+ if (stat->st_result_mask & P9_STATS_RDEV)
+ inode->i_rdev = new_decode_dev(stat->st_rdev);
+ if (stat->st_result_mask & P9_STATS_SIZE)
+ i_size_write(inode, stat->st_size);
+ if (stat->st_result_mask & P9_STATS_BLOCKS)
+ inode->i_blocks = stat->st_blocks;
+ }
+ if (stat->st_result_mask & P9_STATS_GEN)
+ inode->i_generation = stat->st_gen;
+
+ /* Currently we don't support P9_STATS_BTIME and P9_STATS_DATA_VERSION
+ * because the inode structure does not have fields for them.
+ */
+}
+
+/**
* v9fs_qid2ino - convert qid into inode number
* @qid: qid to hash
*
@@ -1022,7 +1449,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
if (IS_ERR(fid))
return PTR_ERR(fid);
- if (!v9fs_proto_dotu(v9ses))
+ if (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses))
return -EBADF;
st = p9_client_stat(fid);
@@ -1128,6 +1555,99 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
}
/**
+ * v9fs_vfs_symlink_dotl - helper function to create symlinks
+ * @dir: directory inode containing symlink
+ * @dentry: dentry for symlink
+ * @symname: symlink data
+ *
+ * See Also: 9P2000.L RFC for more information
+ *
+ */
+
+static int
+v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
+ const char *symname)
+{
+ struct v9fs_session_info *v9ses;
+ struct p9_fid *dfid;
+ struct p9_fid *fid = NULL;
+ struct inode *inode;
+ struct p9_qid qid;
+ char *name;
+ int err;
+ gid_t gid;
+
+ name = (char *) dentry->d_name.name;
+ P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_symlink_dotl : %lu,%s,%s\n",
+ dir->i_ino, name, symname);
+ v9ses = v9fs_inode2v9ses(dir);
+
+ dfid = v9fs_fid_lookup(dentry->d_parent);
+ if (IS_ERR(dfid)) {
+ err = PTR_ERR(dfid);
+ P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
+ return err;
+ }
+
+ gid = v9fs_get_fsgid_for_create(dir);
+
+ if (gid < 0) {
+ P9_DPRINTK(P9_DEBUG_VFS, "v9fs_get_egid failed %d\n", gid);
+ goto error;
+ }
+
+ /* Server doesn't alter fid on TSYMLINK. Hence no need to clone it. */
+ err = p9_client_symlink(dfid, name, (char *)symname, gid, &qid);
+
+ if (err < 0) {
+ P9_DPRINTK(P9_DEBUG_VFS, "p9_client_symlink failed %d\n", err);
+ goto error;
+ }
+
+ if (v9ses->cache) {
+ /* Now walk from the parent so we can get an unopened fid. */
+ fid = p9_client_walk(dfid, 1, &name, 1);
+ if (IS_ERR(fid)) {
+ err = PTR_ERR(fid);
+ P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
+ err);
+ fid = NULL;
+ goto error;
+ }
+
+ /* instantiate inode and assign the unopened fid to dentry */
+ inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
+ err);
+ goto error;
+ }
+ dentry->d_op = &v9fs_cached_dentry_operations;
+ d_instantiate(dentry, inode);
+ err = v9fs_fid_add(dentry, fid);
+ if (err < 0)
+ goto error;
+ fid = NULL;
+ } else {
+ /* Not in cached mode. No need to populate inode with stat */
+ inode = v9fs_get_inode(dir->i_sb, S_IFLNK);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto error;
+ }
+ dentry->d_op = &v9fs_dentry_operations;
+ d_instantiate(dentry, inode);
+ }
+
+error:
+ if (fid)
+ p9_client_clunk(fid);
+
+ return err;
+}
+
+/**
* v9fs_vfs_symlink - helper function to create symlinks
* @dir: directory inode containing symlink
* @dentry: dentry for symlink
@@ -1186,6 +1706,76 @@ clunk_fid:
}
/**
+ * v9fs_vfs_link_dotl - create a hardlink for dotl
+ * @old_dentry: dentry for file to link to
+ * @dir: inode destination for new link
+ * @dentry: dentry for link
+ *
+ */
+
+static int
+v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
+ struct dentry *dentry)
+{
+ int err;
+ struct p9_fid *dfid, *oldfid;
+ char *name;
+ struct v9fs_session_info *v9ses;
+ struct dentry *dir_dentry;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "dir ino: %lu, old_name: %s, new_name: %s\n",
+ dir->i_ino, old_dentry->d_name.name,
+ dentry->d_name.name);
+
+ v9ses = v9fs_inode2v9ses(dir);
+ dir_dentry = v9fs_dentry_from_dir_inode(dir);
+ dfid = v9fs_fid_lookup(dir_dentry);
+ if (IS_ERR(dfid))
+ return PTR_ERR(dfid);
+
+ oldfid = v9fs_fid_lookup(old_dentry);
+ if (IS_ERR(oldfid))
+ return PTR_ERR(oldfid);
+
+ name = (char *) dentry->d_name.name;
+
+ err = p9_client_link(dfid, oldfid, (char *)dentry->d_name.name);
+
+ if (err < 0) {
+ P9_DPRINTK(P9_DEBUG_VFS, "p9_client_link failed %d\n", err);
+ return err;
+ }
+
+ if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
+ /* Get the latest stat info from server. */
+ struct p9_fid *fid;
+ struct p9_stat_dotl *st;
+
+ fid = v9fs_fid_lookup(old_dentry);
+ if (IS_ERR(fid))
+ return PTR_ERR(fid);
+
+ st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
+ if (IS_ERR(st))
+ return PTR_ERR(st);
+
+ v9fs_stat2inode_dotl(st, old_dentry->d_inode);
+
+ kfree(st);
+ } else {
+ /* Caching disabled. No need to get upto date stat info.
+ * This dentry will be released immediately. So, just i_count++
+ */
+ atomic_inc(&old_dentry->d_inode->i_count);
+ }
+
+ dentry->d_op = old_dentry->d_op;
+ d_instantiate(dentry, old_dentry->d_inode);
+
+ return err;
+}
+
+/**
* v9fs_vfs_mknod - create a special file
* @dir: inode destination for new link
* @dentry: dentry for file
@@ -1230,6 +1820,100 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
return retval;
}
+/**
+ * v9fs_vfs_mknod_dotl - create a special file
+ * @dir: inode destination for new link
+ * @dentry: dentry for file
+ * @mode: mode for creation
+ * @rdev: device associated with special file
+ *
+ */
+static int
+v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int mode,
+ dev_t rdev)
+{
+ int err;
+ char *name;
+ struct v9fs_session_info *v9ses;
+ struct p9_fid *fid = NULL, *dfid = NULL;
+ struct inode *inode;
+ gid_t gid;
+ struct p9_qid qid;
+ struct dentry *dir_dentry;
+
+ P9_DPRINTK(P9_DEBUG_VFS,
+ " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
+ dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev));
+
+ if (!new_valid_dev(rdev))
+ return -EINVAL;
+
+ v9ses = v9fs_inode2v9ses(dir);
+ dir_dentry = v9fs_dentry_from_dir_inode(dir);
+ dfid = v9fs_fid_lookup(dir_dentry);
+ if (IS_ERR(dfid)) {
+ err = PTR_ERR(dfid);
+ P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
+ dfid = NULL;
+ goto error;
+ }
+
+ gid = v9fs_get_fsgid_for_create(dir);
+ if (gid < 0) {
+ P9_DPRINTK(P9_DEBUG_VFS, "v9fs_get_fsgid_for_create failed\n");
+ goto error;
+ }
+
+ name = (char *) dentry->d_name.name;
+
+ err = p9_client_mknod_dotl(dfid, name, mode, rdev, gid, &qid);
+ if (err < 0)
+ goto error;
+
+ /* instantiate inode and assign the unopened fid to the dentry */
+ if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
+ fid = p9_client_walk(dfid, 1, &name, 1);
+ if (IS_ERR(fid)) {
+ err = PTR_ERR(fid);
+ P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
+ err);
+ fid = NULL;
+ goto error;
+ }
+
+ inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
+ err);
+ goto error;
+ }
+ dentry->d_op = &v9fs_cached_dentry_operations;
+ d_instantiate(dentry, inode);
+ err = v9fs_fid_add(dentry, fid);
+ if (err < 0)
+ goto error;
+ fid = NULL;
+ } else {
+ /*
+ * Not in cached mode. No need to populate inode with stat.
+ * socket syscall returns a fd, so we need instantiate
+ */
+ inode = v9fs_get_inode(dir->i_sb, mode);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto error;
+ }
+ dentry->d_op = &v9fs_dentry_operations;
+ d_instantiate(dentry, inode);
+ }
+
+error:
+ if (fid)
+ p9_client_clunk(fid);
+ return err;
+}
+
static const struct inode_operations v9fs_dir_inode_operations_dotu = {
.create = v9fs_vfs_create,
.lookup = v9fs_vfs_lookup,
@@ -1238,24 +1922,29 @@ static const struct inode_operations v9fs_dir_inode_operations_dotu = {
.unlink = v9fs_vfs_unlink,
.mkdir = v9fs_vfs_mkdir,
.rmdir = v9fs_vfs_rmdir,
- .mknod = v9fs_vfs_mknod,
+ .mknod = v9fs_vfs_mknod_dotl,
.rename = v9fs_vfs_rename,
.getattr = v9fs_vfs_getattr,
.setattr = v9fs_vfs_setattr,
};
static const struct inode_operations v9fs_dir_inode_operations_dotl = {
- .create = v9fs_vfs_create,
+ .create = v9fs_vfs_create_dotl,
.lookup = v9fs_vfs_lookup,
- .symlink = v9fs_vfs_symlink,
- .link = v9fs_vfs_link,
+ .link = v9fs_vfs_link_dotl,
+ .symlink = v9fs_vfs_symlink_dotl,
.unlink = v9fs_vfs_unlink,
- .mkdir = v9fs_vfs_mkdir,
+ .mkdir = v9fs_vfs_mkdir_dotl,
.rmdir = v9fs_vfs_rmdir,
- .mknod = v9fs_vfs_mknod,
+ .mknod = v9fs_vfs_mknod_dotl,
.rename = v9fs_vfs_rename,
- .getattr = v9fs_vfs_getattr,
- .setattr = v9fs_vfs_setattr,
+ .getattr = v9fs_vfs_getattr_dotl,
+ .setattr = v9fs_vfs_setattr_dotl,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
+ .removexattr = generic_removexattr,
+ .listxattr = v9fs_listxattr,
+
};
static const struct inode_operations v9fs_dir_inode_operations = {
@@ -1276,8 +1965,12 @@ static const struct inode_operations v9fs_file_inode_operations = {
};
static const struct inode_operations v9fs_file_inode_operations_dotl = {
- .getattr = v9fs_vfs_getattr,
- .setattr = v9fs_vfs_setattr,
+ .getattr = v9fs_vfs_getattr_dotl,
+ .setattr = v9fs_vfs_setattr_dotl,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
+ .removexattr = generic_removexattr,
+ .listxattr = v9fs_listxattr,
};
static const struct inode_operations v9fs_symlink_inode_operations = {
@@ -1292,6 +1985,10 @@ static const struct inode_operations v9fs_symlink_inode_operations_dotl = {
.readlink = generic_readlink,
.follow_link = v9fs_vfs_follow_link,
.put_link = v9fs_vfs_put_link,
- .getattr = v9fs_vfs_getattr,
- .setattr = v9fs_vfs_setattr,
+ .getattr = v9fs_vfs_getattr_dotl,
+ .setattr = v9fs_vfs_setattr_dotl,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
+ .removexattr = generic_removexattr,
+ .listxattr = v9fs_listxattr,
};
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index be74d020436..4b9ede0b41b 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -45,6 +45,7 @@
#include "v9fs.h"
#include "v9fs_vfs.h"
#include "fid.h"
+#include "xattr.h"
static const struct super_operations v9fs_super_ops, v9fs_super_ops_dotl;
@@ -77,9 +78,10 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
sb->s_blocksize_bits = fls(v9ses->maxdata - 1);
sb->s_blocksize = 1 << sb->s_blocksize_bits;
sb->s_magic = V9FS_MAGIC;
- if (v9fs_proto_dotl(v9ses))
+ if (v9fs_proto_dotl(v9ses)) {
sb->s_op = &v9fs_super_ops_dotl;
- else
+ sb->s_xattr = v9fs_xattr_handlers;
+ } else
sb->s_op = &v9fs_super_ops;
sb->s_bdi = &v9ses->bdi;
@@ -107,7 +109,6 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
struct inode *inode = NULL;
struct dentry *root = NULL;
struct v9fs_session_info *v9ses = NULL;
- struct p9_wstat *st = NULL;
int mode = S_IRWXUGO | S_ISVTX;
struct p9_fid *fid;
int retval = 0;
@@ -124,16 +125,10 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
goto close_session;
}
- st = p9_client_stat(fid);
- if (IS_ERR(st)) {
- retval = PTR_ERR(st);
- goto clunk_fid;
- }
-
sb = sget(fs_type, NULL, v9fs_set_super, v9ses);
if (IS_ERR(sb)) {
retval = PTR_ERR(sb);
- goto free_stat;
+ goto clunk_fid;
}
v9fs_fill_super(sb, v9ses, flags, data);
@@ -151,22 +146,38 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
}
sb->s_root = root;
- root->d_inode->i_ino = v9fs_qid2ino(&st->qid);
- v9fs_stat2inode(st, root->d_inode, sb);
+ if (v9fs_proto_dotl(v9ses)) {
+ struct p9_stat_dotl *st = NULL;
+ st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
+ if (IS_ERR(st)) {
+ retval = PTR_ERR(st);
+ goto clunk_fid;
+ }
+
+ v9fs_stat2inode_dotl(st, root->d_inode);
+ kfree(st);
+ } else {
+ struct p9_wstat *st = NULL;
+ st = p9_client_stat(fid);
+ if (IS_ERR(st)) {
+ retval = PTR_ERR(st);
+ goto clunk_fid;
+ }
+
+ root->d_inode->i_ino = v9fs_qid2ino(&st->qid);
+ v9fs_stat2inode(st, root->d_inode, sb);
+
+ p9stat_free(st);
+ kfree(st);
+ }
v9fs_fid_add(root, fid);
- p9stat_free(st);
- kfree(st);
P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n");
simple_set_mnt(mnt, sb);
return 0;
-free_stat:
- p9stat_free(st);
- kfree(st);
-
clunk_fid:
p9_client_clunk(fid);
@@ -176,8 +187,6 @@ close_session:
return retval;
release_sb:
- p9stat_free(st);
- kfree(st);
deactivate_locked_super(sb);
return retval;
}
@@ -278,4 +287,5 @@ struct file_system_type v9fs_fs_type = {
.get_sb = v9fs_get_sb,
.kill_sb = v9fs_kill_super,
.owner = THIS_MODULE,
+ .fs_flags = FS_RENAME_DOES_D_MOVE,
};
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c
new file mode 100644
index 00000000000..f88e5c2dc87
--- /dev/null
+++ b/fs/9p/xattr.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright IBM Corporation, 2010
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <net/9p/9p.h>
+#include <net/9p/client.h>
+
+#include "fid.h"
+#include "xattr.h"
+
+/*
+ * v9fs_xattr_get()
+ *
+ * Copy an extended attribute into the buffer
+ * provided, or compute the buffer size required.
+ * Buffer is NULL to compute the size of the buffer required.
+ *
+ * Returns a negative error number on failure, or the number of bytes
+ * used / required on success.
+ */
+ssize_t v9fs_xattr_get(struct dentry *dentry, const char *name,
+ void *buffer, size_t buffer_size)
+{
+ ssize_t retval;
+ int msize, read_count;
+ u64 offset = 0, attr_size;
+ struct p9_fid *fid, *attr_fid;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "%s: name = %s value_len = %zu\n",
+ __func__, name, buffer_size);
+
+ fid = v9fs_fid_lookup(dentry);
+ if (IS_ERR(fid))
+ return PTR_ERR(fid);
+
+ attr_fid = p9_client_xattrwalk(fid, name, &attr_size);
+ if (IS_ERR(attr_fid)) {
+ retval = PTR_ERR(attr_fid);
+ P9_DPRINTK(P9_DEBUG_VFS,
+ "p9_client_attrwalk failed %zd\n", retval);
+ attr_fid = NULL;
+ goto error;
+ }
+ if (!buffer_size) {
+ /* request to get the attr_size */
+ retval = attr_size;
+ goto error;
+ }
+ if (attr_size > buffer_size) {
+ retval = -ERANGE;
+ goto error;
+ }
+ msize = attr_fid->clnt->msize;
+ while (attr_size) {
+ if (attr_size > (msize - P9_IOHDRSZ))
+ read_count = msize - P9_IOHDRSZ;
+ else
+ read_count = attr_size;
+ read_count = p9_client_read(attr_fid, ((char *)buffer)+offset,
+ NULL, offset, read_count);
+ if (read_count < 0) {
+ /* error in xattr read */
+ retval = read_count;
+ goto error;
+ }
+ offset += read_count;
+ attr_size -= read_count;
+ }
+ /* Total read xattr bytes */
+ retval = offset;
+error:
+ if (attr_fid)
+ p9_client_clunk(attr_fid);
+ return retval;
+
+}
+
+/*
+ * v9fs_xattr_set()
+ *
+ * Create, replace or remove an extended attribute for this inode. Buffer
+ * is NULL to remove an existing extended attribute, and non-NULL to
+ * either replace an existing extended attribute, or create a new extended
+ * attribute. The flags XATTR_REPLACE and XATTR_CREATE
+ * specify that an extended attribute must exist and must not exist
+ * previous to the call, respectively.
+ *
+ * Returns 0, or a negative error number on failure.
+ */
+int v9fs_xattr_set(struct dentry *dentry, const char *name,
+ const void *value, size_t value_len, int flags)
+{
+ u64 offset = 0;
+ int retval, msize, write_count;
+ struct p9_fid *fid = NULL;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "%s: name = %s value_len = %zu flags = %d\n",
+ __func__, name, value_len, flags);
+
+ fid = v9fs_fid_clone(dentry);
+ if (IS_ERR(fid)) {
+ retval = PTR_ERR(fid);
+ fid = NULL;
+ goto error;
+ }
+ /*
+ * On success fid points to xattr
+ */
+ retval = p9_client_xattrcreate(fid, name, value_len, flags);
+ if (retval < 0) {
+ P9_DPRINTK(P9_DEBUG_VFS,
+ "p9_client_xattrcreate failed %d\n", retval);
+ goto error;
+ }
+ msize = fid->clnt->msize;;
+ while (value_len) {
+ if (value_len > (msize - P9_IOHDRSZ))
+ write_count = msize - P9_IOHDRSZ;
+ else
+ write_count = value_len;
+ write_count = p9_client_write(fid, ((char *)value)+offset,
+ NULL, offset, write_count);
+ if (write_count < 0) {
+ /* error in xattr write */
+ retval = write_count;
+ goto error;
+ }
+ offset += write_count;
+ value_len -= write_count;
+ }
+ /* Total read xattr bytes */
+ retval = offset;
+error:
+ if (fid)
+ retval = p9_client_clunk(fid);
+ return retval;
+}
+
+ssize_t v9fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
+{
+ return v9fs_xattr_get(dentry, NULL, buffer, buffer_size);
+}
+
+const struct xattr_handler *v9fs_xattr_handlers[] = {
+ &v9fs_xattr_user_handler,
+ NULL
+};
diff --git a/fs/9p/xattr.h b/fs/9p/xattr.h
new file mode 100644
index 00000000000..9ddf672ae5c
--- /dev/null
+++ b/fs/9p/xattr.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright IBM Corporation, 2010
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+#ifndef FS_9P_XATTR_H
+#define FS_9P_XATTR_H
+
+#include <linux/xattr.h>
+
+extern const struct xattr_handler *v9fs_xattr_handlers[];
+extern struct xattr_handler v9fs_xattr_user_handler;
+
+extern ssize_t v9fs_xattr_get(struct dentry *, const char *,
+ void *, size_t);
+extern int v9fs_xattr_set(struct dentry *, const char *,
+ const void *, size_t, int);
+extern ssize_t v9fs_listxattr(struct dentry *, char *, size_t);
+#endif /* FS_9P_XATTR_H */
diff --git a/fs/9p/xattr_user.c b/fs/9p/xattr_user.c
new file mode 100644
index 00000000000..d0b701b7208
--- /dev/null
+++ b/fs/9p/xattr_user.c
@@ -0,0 +1,80 @@
+/*
+ * Copyright IBM Corporation, 2010
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include "xattr.h"
+
+static int v9fs_xattr_user_get(struct dentry *dentry, const char *name,
+ void *buffer, size_t size, int type)
+{
+ int retval;
+ char *full_name;
+ size_t name_len;
+ size_t prefix_len = XATTR_USER_PREFIX_LEN;
+
+ if (name == NULL)
+ return -EINVAL;
+
+ if (strcmp(name, "") == 0)
+ return -EINVAL;
+
+ name_len = strlen(name);
+ full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL);
+ if (!full_name)
+ return -ENOMEM;
+ memcpy(full_name, XATTR_USER_PREFIX, prefix_len);
+ memcpy(full_name+prefix_len, name, name_len);
+ full_name[prefix_len + name_len] = '\0';
+
+ retval = v9fs_xattr_get(dentry, full_name, buffer, size);
+ kfree(full_name);
+ return retval;
+}
+
+static int v9fs_xattr_user_set(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags, int type)
+{
+ int retval;
+ char *full_name;
+ size_t name_len;
+ size_t prefix_len = XATTR_USER_PREFIX_LEN;
+
+ if (name == NULL)
+ return -EINVAL;
+
+ if (strcmp(name, "") == 0)
+ return -EINVAL;
+
+ name_len = strlen(name);
+ full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL);
+ if (!full_name)
+ return -ENOMEM;
+ memcpy(full_name, XATTR_USER_PREFIX, prefix_len);
+ memcpy(full_name + prefix_len, name, name_len);
+ full_name[prefix_len + name_len] = '\0';
+
+ retval = v9fs_xattr_set(dentry, full_name, value, size, flags);
+ kfree(full_name);
+ return retval;
+}
+
+struct xattr_handler v9fs_xattr_user_handler = {
+ .prefix = XATTR_USER_PREFIX,
+ .get = v9fs_xattr_user_get,
+ .set = v9fs_xattr_user_set,
+};
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 3dab9e9948d..722743b152d 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -680,7 +680,6 @@ int afs_writeback_all(struct afs_vnode *vnode)
{
struct address_space *mapping = vnode->vfs_inode.i_mapping;
struct writeback_control wbc = {
- .bdi = mapping->backing_dev_info,
.sync_mode = WB_SYNC_ALL,
.nr_to_write = LONG_MAX,
.range_cyclic = 1,
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index b6ab27ccf21..811384bec8d 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -68,11 +68,7 @@
* Here we can be a bit looser than the data sections since this
* needs to only meet arch ABI requirements.
*/
-#ifdef ARCH_SLAB_MINALIGN
-#define FLAT_STACK_ALIGN (ARCH_SLAB_MINALIGN)
-#else
-#define FLAT_STACK_ALIGN (sizeof(void *))
-#endif
+#define FLAT_STACK_ALIGN max_t(unsigned long, sizeof(void *), ARCH_SLAB_MINALIGN)
#define RELOC_FAILED 0xff00ff01 /* Relocation incorrect somewhere */
#define UNLOADED_LIB 0x7ff000ff /* Placeholder for unused library */
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 0d1d966b0fe..c3df14ce2cc 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2304,12 +2304,17 @@ noinline int btrfs_leaf_free_space(struct btrfs_root *root,
return ret;
}
+/*
+ * min slot controls the lowest index we're willing to push to the
+ * right. We'll push up to and including min_slot, but no lower
+ */
static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
int data_size, int empty,
struct extent_buffer *right,
- int free_space, u32 left_nritems)
+ int free_space, u32 left_nritems,
+ u32 min_slot)
{
struct extent_buffer *left = path->nodes[0];
struct extent_buffer *upper = path->nodes[1];
@@ -2327,7 +2332,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
if (empty)
nr = 0;
else
- nr = 1;
+ nr = max_t(u32, 1, min_slot);
if (path->slots[0] >= left_nritems)
push_space += data_size;
@@ -2469,10 +2474,14 @@ out_unlock:
*
* returns 1 if the push failed because the other node didn't have enough
* room, 0 if everything worked out and < 0 if there were major errors.
+ *
+ * this will push starting from min_slot to the end of the leaf. It won't
+ * push any slot lower than min_slot
*/
static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_path *path, int data_size,
- int empty)
+ *root, struct btrfs_path *path,
+ int min_data_size, int data_size,
+ int empty, u32 min_slot)
{
struct extent_buffer *left = path->nodes[0];
struct extent_buffer *right;
@@ -2514,8 +2523,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
if (left_nritems == 0)
goto out_unlock;
- return __push_leaf_right(trans, root, path, data_size, empty,
- right, free_space, left_nritems);
+ return __push_leaf_right(trans, root, path, min_data_size, empty,
+ right, free_space, left_nritems, min_slot);
out_unlock:
btrfs_tree_unlock(right);
free_extent_buffer(right);
@@ -2525,12 +2534,17 @@ out_unlock:
/*
* push some data in the path leaf to the left, trying to free up at
* least data_size bytes. returns zero if the push worked, nonzero otherwise
+ *
+ * max_slot can put a limit on how far into the leaf we'll push items. The
+ * item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the
+ * items
*/
static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, int data_size,
int empty, struct extent_buffer *left,
- int free_space, int right_nritems)
+ int free_space, u32 right_nritems,
+ u32 max_slot)
{
struct btrfs_disk_key disk_key;
struct extent_buffer *right = path->nodes[0];
@@ -2549,9 +2563,9 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
slot = path->slots[1];
if (empty)
- nr = right_nritems;
+ nr = min(right_nritems, max_slot);
else
- nr = right_nritems - 1;
+ nr = min(right_nritems - 1, max_slot);
for (i = 0; i < nr; i++) {
item = btrfs_item_nr(right, i);
@@ -2712,10 +2726,14 @@ out:
/*
* push some data in the path leaf to the left, trying to free up at
* least data_size bytes. returns zero if the push worked, nonzero otherwise
+ *
+ * max_slot can put a limit on how far into the leaf we'll push items. The
+ * item at 'max_slot' won't be touched. Use (u32)-1 to make us push all the
+ * items
*/
static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_path *path, int data_size,
- int empty)
+ *root, struct btrfs_path *path, int min_data_size,
+ int data_size, int empty, u32 max_slot)
{
struct extent_buffer *right = path->nodes[0];
struct extent_buffer *left;
@@ -2761,8 +2779,9 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
goto out;
}
- return __push_leaf_left(trans, root, path, data_size,
- empty, left, free_space, right_nritems);
+ return __push_leaf_left(trans, root, path, min_data_size,
+ empty, left, free_space, right_nritems,
+ max_slot);
out:
btrfs_tree_unlock(left);
free_extent_buffer(left);
@@ -2855,6 +2874,64 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans,
}
/*
+ * double splits happen when we need to insert a big item in the middle
+ * of a leaf. A double split can leave us with 3 mostly empty leaves:
+ * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ]
+ * A B C
+ *
+ * We avoid this by trying to push the items on either side of our target
+ * into the adjacent leaves. If all goes well we can avoid the double split
+ * completely.
+ */
+static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ int data_size)
+{
+ int ret;
+ int progress = 0;
+ int slot;
+ u32 nritems;
+
+ slot = path->slots[0];
+
+ /*
+ * try to push all the items after our slot into the
+ * right leaf
+ */
+ ret = push_leaf_right(trans, root, path, 1, data_size, 0, slot);
+ if (ret < 0)
+ return ret;
+
+ if (ret == 0)
+ progress++;
+
+ nritems = btrfs_header_nritems(path->nodes[0]);
+ /*
+ * our goal is to get our slot at the start or end of a leaf. If
+ * we've done so we're done
+ */
+ if (path->slots[0] == 0 || path->slots[0] == nritems)
+ return 0;
+
+ if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
+ return 0;
+
+ /* try to push all the items before our slot into the next leaf */
+ slot = path->slots[0];
+ ret = push_leaf_left(trans, root, path, 1, data_size, 0, slot);
+ if (ret < 0)
+ return ret;
+
+ if (ret == 0)
+ progress++;
+
+ if (progress)
+ return 0;
+ return 1;
+}
+
+/*
* split the path's leaf in two, making sure there is at least data_size
* available for the resulting leaf level of the path.
*
@@ -2876,6 +2953,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
int wret;
int split;
int num_doubles = 0;
+ int tried_avoid_double = 0;
l = path->nodes[0];
slot = path->slots[0];
@@ -2884,12 +2962,14 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
return -EOVERFLOW;
/* first try to make some room by pushing left and right */
- if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) {
- wret = push_leaf_right(trans, root, path, data_size, 0);
+ if (data_size) {
+ wret = push_leaf_right(trans, root, path, data_size,
+ data_size, 0, 0);
if (wret < 0)
return wret;
if (wret) {
- wret = push_leaf_left(trans, root, path, data_size, 0);
+ wret = push_leaf_left(trans, root, path, data_size,
+ data_size, 0, (u32)-1);
if (wret < 0)
return wret;
}
@@ -2923,6 +3003,8 @@ again:
if (mid != nritems &&
leaf_space_used(l, mid, nritems - mid) +
data_size > BTRFS_LEAF_DATA_SIZE(root)) {
+ if (data_size && !tried_avoid_double)
+ goto push_for_double;
split = 2;
}
}
@@ -2939,6 +3021,8 @@ again:
if (mid != nritems &&
leaf_space_used(l, mid, nritems - mid) +
data_size > BTRFS_LEAF_DATA_SIZE(root)) {
+ if (data_size && !tried_avoid_double)
+ goto push_for_double;
split = 2 ;
}
}
@@ -3019,6 +3103,13 @@ again:
}
return ret;
+
+push_for_double:
+ push_for_double_split(trans, root, path, data_size);
+ tried_avoid_double = 1;
+ if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
+ return 0;
+ goto again;
}
static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
@@ -3915,13 +4006,15 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
extent_buffer_get(leaf);
btrfs_set_path_blocking(path);
- wret = push_leaf_left(trans, root, path, 1, 1);
+ wret = push_leaf_left(trans, root, path, 1, 1,
+ 1, (u32)-1);
if (wret < 0 && wret != -ENOSPC)
ret = wret;
if (path->nodes[0] == leaf &&
btrfs_header_nritems(leaf)) {
- wret = push_leaf_right(trans, root, path, 1, 1);
+ wret = push_leaf_right(trans, root, path, 1,
+ 1, 1, 0);
if (wret < 0 && wret != -ENOSPC)
ret = wret;
}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a4080c21ec5..d74e6af9b53 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2594,7 +2594,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
.sync_io = wbc->sync_mode == WB_SYNC_ALL,
};
struct writeback_control wbc_writepages = {
- .bdi = wbc->bdi,
.sync_mode = wbc->sync_mode,
.older_than_this = NULL,
.nr_to_write = 64,
@@ -2628,7 +2627,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
.sync_io = mode == WB_SYNC_ALL,
};
struct writeback_control wbc_writepages = {
- .bdi = inode->i_mapping->backing_dev_info,
.sync_mode = mode,
.older_than_this = NULL,
.nr_to_write = nr_pages * 2,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 4dbaf89b133..9254b3d58db 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1458,7 +1458,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
*/
/* the destination must be opened for writing */
- if (!(file->f_mode & FMODE_WRITE))
+ if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
return -EINVAL;
ret = mnt_want_write(file->f_path.mnt);
@@ -1511,7 +1511,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
/* determine range to clone */
ret = -EINVAL;
- if (off >= src->i_size || off + len > src->i_size)
+ if (off + len > src->i_size || off + len < off)
goto out_unlock;
if (len == 0)
olen = len = src->i_size - off;
@@ -1578,6 +1578,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
u64 disko = 0, diskl = 0;
u64 datao = 0, datal = 0;
u8 comp;
+ u64 endoff;
size = btrfs_item_size_nr(leaf, slot);
read_extent_buffer(leaf, buf,
@@ -1712,9 +1713,18 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
btrfs_release_path(root, path);
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
- if (new_key.offset + datal > inode->i_size)
- btrfs_i_size_write(inode,
- new_key.offset + datal);
+
+ /*
+ * we round up to the block size at eof when
+ * determining which extents to clone above,
+ * but shouldn't round up the file size
+ */
+ endoff = new_key.offset + datal;
+ if (endoff > off+olen)
+ endoff = off+olen;
+ if (endoff > inode->i_size)
+ btrfs_i_size_write(inode, endoff);
+
BTRFS_I(inode)->flags = BTRFS_I(src)->flags;
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig
index 04b8280582a..bc87b9c1d27 100644
--- a/fs/ceph/Kconfig
+++ b/fs/ceph/Kconfig
@@ -2,7 +2,7 @@ config CEPH_FS
tristate "Ceph distributed file system (EXPERIMENTAL)"
depends on INET && EXPERIMENTAL
select LIBCRC32C
- select CONFIG_CRYPTO_AES
+ select CRYPTO_AES
help
Choose Y or M here to include support for mounting the
experimental Ceph distributed file system. Ceph is an extremely
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c
index 83d4d2785ff..6d44053ecff 100644
--- a/fs/ceph/auth_x.c
+++ b/fs/ceph/auth_x.c
@@ -493,7 +493,7 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result,
return -EAGAIN;
}
- op = le32_to_cpu(head->op);
+ op = le16_to_cpu(head->op);
result = le32_to_cpu(head->result);
dout("handle_reply op %d result %d\n", op, result);
switch (op) {
@@ -613,6 +613,9 @@ static void ceph_x_destroy(struct ceph_auth_client *ac)
remove_ticket_handler(ac, th);
}
+ if (xi->auth_authorizer.buf)
+ ceph_buffer_put(xi->auth_authorizer.buf);
+
kfree(ac->private);
ac->private = NULL;
}
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 619b61655ee..b81be9a5648 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -244,8 +244,14 @@ static struct ceph_cap *get_cap(struct ceph_cap_reservation *ctx)
struct ceph_cap *cap = NULL;
/* temporary, until we do something about cap import/export */
- if (!ctx)
- return kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS);
+ if (!ctx) {
+ cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS);
+ if (cap) {
+ caps_use_count++;
+ caps_total_count++;
+ }
+ return cap;
+ }
spin_lock(&caps_list_lock);
dout("get_cap ctx=%p (%d) %d = %d used + %d resv + %d avail\n",
@@ -621,7 +627,7 @@ retry:
if (fmode >= 0)
__ceph_get_fmode(ci, fmode);
spin_unlock(&inode->i_lock);
- wake_up(&ci->i_cap_wq);
+ wake_up_all(&ci->i_cap_wq);
return 0;
}
@@ -1175,7 +1181,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
}
if (wake)
- wake_up(&ci->i_cap_wq);
+ wake_up_all(&ci->i_cap_wq);
return delayed;
}
@@ -2147,7 +2153,7 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
else if (flushsnaps)
ceph_flush_snaps(ci);
if (wake)
- wake_up(&ci->i_cap_wq);
+ wake_up_all(&ci->i_cap_wq);
if (put)
iput(inode);
}
@@ -2223,7 +2229,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
iput(inode);
} else if (complete_capsnap) {
ceph_flush_snaps(ci);
- wake_up(&ci->i_cap_wq);
+ wake_up_all(&ci->i_cap_wq);
}
if (drop_capsnap)
iput(inode);
@@ -2399,7 +2405,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
if (queue_invalidate)
ceph_queue_invalidate(inode);
if (wake)
- wake_up(&ci->i_cap_wq);
+ wake_up_all(&ci->i_cap_wq);
if (check_caps == 1)
ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY,
@@ -2454,7 +2460,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
struct ceph_inode_info,
i_flushing_item)->vfs_inode);
mdsc->num_cap_flushing--;
- wake_up(&mdsc->cap_flushing_wq);
+ wake_up_all(&mdsc->cap_flushing_wq);
dout(" inode %p now !flushing\n", inode);
if (ci->i_dirty_caps == 0) {
@@ -2466,7 +2472,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
}
}
spin_unlock(&mdsc->cap_dirty_lock);
- wake_up(&ci->i_cap_wq);
+ wake_up_all(&ci->i_cap_wq);
out:
spin_unlock(&inode->i_lock);
@@ -2886,18 +2892,19 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_cap *cap;
struct ceph_mds_request_release *rel = *p;
+ int used, dirty;
int ret = 0;
- int used = 0;
spin_lock(&inode->i_lock);
used = __ceph_caps_used(ci);
+ dirty = __ceph_caps_dirty(ci);
- dout("encode_inode_release %p mds%d used %s drop %s unless %s\n", inode,
- mds, ceph_cap_string(used), ceph_cap_string(drop),
+ dout("encode_inode_release %p mds%d used|dirty %s drop %s unless %s\n",
+ inode, mds, ceph_cap_string(used|dirty), ceph_cap_string(drop),
ceph_cap_string(unless));
- /* only drop unused caps */
- drop &= ~used;
+ /* only drop unused, clean caps */
+ drop &= ~(used | dirty);
cap = __get_cap_for_mds(ci, mds);
if (cap && __cap_is_valid(cap)) {
@@ -2977,6 +2984,7 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
memcpy(*p, dentry->d_name.name, dentry->d_name.len);
*p += dentry->d_name.len;
rel->dname_seq = cpu_to_le32(di->lease_seq);
+ __ceph_mdsc_drop_dentry_lease(dentry);
}
spin_unlock(&dentry->d_lock);
return ret;
diff --git a/fs/ceph/crush/mapper.c b/fs/ceph/crush/mapper.c
index 9ba54efb654..a4eec133258 100644
--- a/fs/ceph/crush/mapper.c
+++ b/fs/ceph/crush/mapper.c
@@ -238,7 +238,7 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket,
static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
{
- dprintk("choose %d x=%d r=%d\n", in->id, x, r);
+ dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
switch (in->alg) {
case CRUSH_BUCKET_UNIFORM:
return bucket_uniform_choose((struct crush_bucket_uniform *)in,
@@ -264,7 +264,7 @@ static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
*/
static int is_out(struct crush_map *map, __u32 *weight, int item, int x)
{
- if (weight[item] >= 0x1000)
+ if (weight[item] >= 0x10000)
return 0;
if (weight[item] == 0)
return 1;
@@ -305,7 +305,9 @@ static int crush_choose(struct crush_map *map,
int itemtype;
int collide, reject;
const int orig_tries = 5; /* attempts before we fall back to search */
- dprintk("choose bucket %d x %d outpos %d\n", bucket->id, x, outpos);
+
+ dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "",
+ bucket->id, x, outpos, numrep);
for (rep = outpos; rep < numrep; rep++) {
/* keep trying until we get a non-out, non-colliding item */
@@ -366,6 +368,7 @@ static int crush_choose(struct crush_map *map,
BUG_ON(item >= 0 ||
(-1-item) >= map->max_buckets);
in = map->buckets[-1-item];
+ retry_bucket = 1;
continue;
}
@@ -377,15 +380,25 @@ static int crush_choose(struct crush_map *map,
}
}
- if (recurse_to_leaf &&
- item < 0 &&
- crush_choose(map, map->buckets[-1-item],
- weight,
- x, outpos+1, 0,
- out2, outpos,
- firstn, 0, NULL) <= outpos) {
- reject = 1;
- } else {
+ reject = 0;
+ if (recurse_to_leaf) {
+ if (item < 0) {
+ if (crush_choose(map,
+ map->buckets[-1-item],
+ weight,
+ x, outpos+1, 0,
+ out2, outpos,
+ firstn, 0,
+ NULL) <= outpos)
+ /* didn't get leaf */
+ reject = 1;
+ } else {
+ /* we already have a leaf! */
+ out2[outpos] = item;
+ }
+ }
+
+ if (!reject) {
/* out? */
if (itemtype == 0)
reject = is_out(map, weight,
@@ -424,12 +437,12 @@ reject:
continue;
}
- dprintk("choose got %d\n", item);
+ dprintk("CHOOSE got %d\n", item);
out[outpos] = item;
outpos++;
}
- dprintk("choose returns %d\n", outpos);
+ dprintk("CHOOSE returns %d\n", outpos);
return outpos;
}
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 3be33fb066c..f2f5332ddbb 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -261,7 +261,7 @@ static int osdc_show(struct seq_file *s, void *pp)
static int caps_show(struct seq_file *s, void *p)
{
- struct ceph_client *client = p;
+ struct ceph_client *client = s->private;
int total, avail, used, reserved, min;
ceph_reservation_status(client, &total, &avail, &used, &reserved, &min);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index f85719310db..f94ed3c7f6a 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -266,6 +266,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
spin_lock(&inode->i_lock);
if ((filp->f_pos == 2 || fi->dentry) &&
!ceph_test_opt(client, NOASYNCREADDIR) &&
+ ceph_snap(inode) != CEPH_SNAPDIR &&
(ci->i_ceph_flags & CEPH_I_COMPLETE) &&
__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
err = __dcache_readdir(filp, dirent, filldir);
@@ -1013,18 +1014,22 @@ out_touch:
/*
* When a dentry is released, clear the dir I_COMPLETE if it was part
- * of the current dir gen.
+ * of the current dir gen or if this is in the snapshot namespace.
*/
static void ceph_dentry_release(struct dentry *dentry)
{
struct ceph_dentry_info *di = ceph_dentry(dentry);
struct inode *parent_inode = dentry->d_parent->d_inode;
+ u64 snapid = ceph_snap(parent_inode);
- if (parent_inode) {
+ dout("dentry_release %p parent %p\n", dentry, parent_inode);
+
+ if (parent_inode && snapid != CEPH_SNAPDIR) {
struct ceph_inode_info *ci = ceph_inode(parent_inode);
spin_lock(&parent_inode->i_lock);
- if (ci->i_shared_gen == di->lease_shared_gen) {
+ if (ci->i_shared_gen == di->lease_shared_gen ||
+ snapid <= CEPH_MAXSNAP) {
dout(" clearing %p complete (d_release)\n",
parent_inode);
ci->i_ceph_flags &= ~CEPH_I_COMPLETE;
@@ -1241,7 +1246,9 @@ struct dentry_operations ceph_dentry_ops = {
struct dentry_operations ceph_snapdir_dentry_ops = {
.d_revalidate = ceph_snapdir_d_revalidate,
+ .d_release = ceph_dentry_release,
};
struct dentry_operations ceph_snap_dentry_ops = {
+ .d_release = ceph_dentry_release,
};
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 6251a1574b9..7c08698fad3 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -265,7 +265,7 @@ int ceph_release(struct inode *inode, struct file *file)
kmem_cache_free(ceph_file_cachep, cf);
/* wake up anyone waiting for caps on this inode */
- wake_up(&ci->i_cap_wq);
+ wake_up_all(&ci->i_cap_wq);
return 0;
}
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index ab47f46ca28..389f9dbd994 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -854,8 +854,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
d_drop(dn);
realdn = d_materialise_unique(dn, in);
if (IS_ERR(realdn)) {
- pr_err("splice_dentry error %p inode %p ino %llx.%llx\n",
- dn, in, ceph_vinop(in));
+ pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n",
+ PTR_ERR(realdn), dn, in, ceph_vinop(in));
if (prehash)
*prehash = false; /* don't rehash on error */
dn = realdn; /* note realdn contains the error */
@@ -1199,8 +1199,10 @@ retry_lookup:
goto out;
}
err = ceph_init_dentry(dn);
- if (err < 0)
+ if (err < 0) {
+ dput(dn);
goto out;
+ }
} else if (dn->d_inode &&
(ceph_ino(dn->d_inode) != vino.ino ||
ceph_snap(dn->d_inode) != vino.snap)) {
@@ -1234,18 +1236,23 @@ retry_lookup:
goto out;
}
dn = splice_dentry(dn, in, NULL);
+ if (IS_ERR(dn))
+ dn = NULL;
}
if (fill_inode(in, &rinfo->dir_in[i], NULL, session,
req->r_request_started, -1,
&req->r_caps_reservation) < 0) {
pr_err("fill_inode badness on %p\n", in);
- dput(dn);
- continue;
+ goto next_item;
}
- update_dentry_lease(dn, rinfo->dir_dlease[i],
- req->r_session, req->r_request_started);
- dput(dn);
+ if (dn)
+ update_dentry_lease(dn, rinfo->dir_dlease[i],
+ req->r_session,
+ req->r_request_started);
+next_item:
+ if (dn)
+ dput(dn);
}
req->r_did_prepopulate = true;
@@ -1494,7 +1501,7 @@ retry:
if (wrbuffer_refs == 0)
ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
if (wake)
- wake_up(&ci->i_cap_wq);
+ wake_up_all(&ci->i_cap_wq);
}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 1766947fc07..dd440bd438a 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -868,7 +868,7 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
{
struct ceph_inode_info *ci = ceph_inode(inode);
- wake_up(&ci->i_cap_wq);
+ wake_up_all(&ci->i_cap_wq);
if (arg) {
spin_lock(&inode->i_lock);
ci->i_wanted_max_size = 0;
@@ -1514,6 +1514,9 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
ceph_encode_filepath(&p, end, ino1, path1);
ceph_encode_filepath(&p, end, ino2, path2);
+ /* make note of release offset, in case we need to replay */
+ req->r_request_release_offset = p - msg->front.iov_base;
+
/* cap releases */
releases = 0;
if (req->r_inode_drop)
@@ -1561,7 +1564,7 @@ static void complete_request(struct ceph_mds_client *mdsc,
if (req->r_callback)
req->r_callback(mdsc, req);
else
- complete(&req->r_completion);
+ complete_all(&req->r_completion);
}
/*
@@ -1580,6 +1583,32 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
dout("prepare_send_request %p tid %lld %s (attempt %d)\n", req,
req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts);
+ if (req->r_got_unsafe) {
+ /*
+ * Replay. Do not regenerate message (and rebuild
+ * paths, etc.); just use the original message.
+ * Rebuilding paths will break for renames because
+ * d_move mangles the src name.
+ */
+ msg = req->r_request;
+ rhead = msg->front.iov_base;
+
+ flags = le32_to_cpu(rhead->flags);
+ flags |= CEPH_MDS_FLAG_REPLAY;
+ rhead->flags = cpu_to_le32(flags);
+
+ if (req->r_target_inode)
+ rhead->ino = cpu_to_le64(ceph_ino(req->r_target_inode));
+
+ rhead->num_retry = req->r_attempts - 1;
+
+ /* remove cap/dentry releases from message */
+ rhead->num_releases = 0;
+ msg->hdr.front_len = cpu_to_le32(req->r_request_release_offset);
+ msg->front.iov_len = req->r_request_release_offset;
+ return 0;
+ }
+
if (req->r_request) {
ceph_msg_put(req->r_request);
req->r_request = NULL;
@@ -1601,13 +1630,9 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
rhead->flags = cpu_to_le32(flags);
rhead->num_fwd = req->r_num_fwd;
rhead->num_retry = req->r_attempts - 1;
+ rhead->ino = 0;
dout(" r_locked_dir = %p\n", req->r_locked_dir);
-
- if (req->r_target_inode && req->r_got_unsafe)
- rhead->ino = cpu_to_le64(ceph_ino(req->r_target_inode));
- else
- rhead->ino = 0;
return 0;
}
@@ -1907,7 +1932,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
if (head->safe) {
req->r_got_safe = true;
__unregister_request(mdsc, req);
- complete(&req->r_safe_completion);
+ complete_all(&req->r_safe_completion);
if (req->r_got_unsafe) {
/*
@@ -1922,7 +1947,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
/* last unsafe request during umount? */
if (mdsc->stopping && !__get_oldest_req(mdsc))
- complete(&mdsc->safe_umount_waiters);
+ complete_all(&mdsc->safe_umount_waiters);
mutex_unlock(&mdsc->mutex);
goto out;
}
@@ -2101,7 +2126,7 @@ static void handle_session(struct ceph_mds_session *session,
pr_info("mds%d reconnect denied\n", session->s_mds);
remove_session_caps(session);
wake = 1; /* for good measure */
- complete(&mdsc->session_close_waiters);
+ complete_all(&mdsc->session_close_waiters);
kick_requests(mdsc, mds);
break;
@@ -2783,6 +2808,12 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
drop_leases(mdsc);
ceph_flush_dirty_caps(mdsc);
wait_requests(mdsc);
+
+ /*
+ * wait for reply handlers to drop their request refs and
+ * their inode/dcache refs
+ */
+ ceph_msgr_flush();
}
/*
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index b292fa42a66..952410c60d0 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -188,6 +188,7 @@ struct ceph_mds_request {
int r_old_inode_drop, r_old_inode_unless;
struct ceph_msg *r_request; /* original request */
+ int r_request_release_offset;
struct ceph_msg *r_reply;
struct ceph_mds_reply_info_parsed r_reply_info;
int r_err;
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
index 64b8b1f7863..15167b2daa5 100644
--- a/fs/ceph/messenger.c
+++ b/fs/ceph/messenger.c
@@ -43,7 +43,8 @@ static void ceph_fault(struct ceph_connection *con);
* nicely render a sockaddr as a string.
*/
#define MAX_ADDR_STR 20
-static char addr_str[MAX_ADDR_STR][40];
+#define MAX_ADDR_STR_LEN 60
+static char addr_str[MAX_ADDR_STR][MAX_ADDR_STR_LEN];
static DEFINE_SPINLOCK(addr_str_lock);
static int last_addr_str;
@@ -52,7 +53,6 @@ const char *pr_addr(const struct sockaddr_storage *ss)
int i;
char *s;
struct sockaddr_in *in4 = (void *)ss;
- unsigned char *quad = (void *)&in4->sin_addr.s_addr;
struct sockaddr_in6 *in6 = (void *)ss;
spin_lock(&addr_str_lock);
@@ -64,25 +64,13 @@ const char *pr_addr(const struct sockaddr_storage *ss)
switch (ss->ss_family) {
case AF_INET:
- sprintf(s, "%u.%u.%u.%u:%u",
- (unsigned int)quad[0],
- (unsigned int)quad[1],
- (unsigned int)quad[2],
- (unsigned int)quad[3],
- (unsigned int)ntohs(in4->sin_port));
+ snprintf(s, MAX_ADDR_STR_LEN, "%pI4:%u", &in4->sin_addr,
+ (unsigned int)ntohs(in4->sin_port));
break;
case AF_INET6:
- sprintf(s, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x:%u",
- in6->sin6_addr.s6_addr16[0],
- in6->sin6_addr.s6_addr16[1],
- in6->sin6_addr.s6_addr16[2],
- in6->sin6_addr.s6_addr16[3],
- in6->sin6_addr.s6_addr16[4],
- in6->sin6_addr.s6_addr16[5],
- in6->sin6_addr.s6_addr16[6],
- in6->sin6_addr.s6_addr16[7],
- (unsigned int)ntohs(in6->sin6_port));
+ snprintf(s, MAX_ADDR_STR_LEN, "[%pI6c]:%u", &in6->sin6_addr,
+ (unsigned int)ntohs(in6->sin6_port));
break;
default:
@@ -215,12 +203,13 @@ static void set_sock_callbacks(struct socket *sock,
*/
static struct socket *ceph_tcp_connect(struct ceph_connection *con)
{
- struct sockaddr *paddr = (struct sockaddr *)&con->peer_addr.in_addr;
+ struct sockaddr_storage *paddr = &con->peer_addr.in_addr;
struct socket *sock;
int ret;
BUG_ON(con->sock);
- ret = sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
+ ret = sock_create_kern(con->peer_addr.in_addr.ss_family, SOCK_STREAM,
+ IPPROTO_TCP, &sock);
if (ret)
return ERR_PTR(ret);
con->sock = sock;
@@ -234,7 +223,8 @@ static struct socket *ceph_tcp_connect(struct ceph_connection *con)
dout("connect %s\n", pr_addr(&con->peer_addr.in_addr));
- ret = sock->ops->connect(sock, paddr, sizeof(*paddr), O_NONBLOCK);
+ ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr),
+ O_NONBLOCK);
if (ret == -EINPROGRESS) {
dout("connect %s EINPROGRESS sk_state = %u\n",
pr_addr(&con->peer_addr.in_addr),
@@ -657,7 +647,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr,
dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con,
con->connect_seq, global_seq, proto);
- con->out_connect.features = CEPH_FEATURE_SUPPORTED_CLIENT;
+ con->out_connect.features = cpu_to_le64(CEPH_FEATURE_SUPPORTED_CLIENT);
con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT);
con->out_connect.connect_seq = cpu_to_le32(con->connect_seq);
con->out_connect.global_seq = cpu_to_le32(global_seq);
@@ -1009,19 +999,32 @@ int ceph_parse_ips(const char *c, const char *end,
struct sockaddr_in *in4 = (void *)ss;
struct sockaddr_in6 *in6 = (void *)ss;
int port;
+ char delim = ',';
+
+ if (*p == '[') {
+ delim = ']';
+ p++;
+ }
memset(ss, 0, sizeof(*ss));
if (in4_pton(p, end - p, (u8 *)&in4->sin_addr.s_addr,
- ',', &ipend)) {
+ delim, &ipend))
ss->ss_family = AF_INET;
- } else if (in6_pton(p, end - p, (u8 *)&in6->sin6_addr.s6_addr,
- ',', &ipend)) {
+ else if (in6_pton(p, end - p, (u8 *)&in6->sin6_addr.s6_addr,
+ delim, &ipend))
ss->ss_family = AF_INET6;
- } else {
+ else
goto bad;
- }
p = ipend;
+ if (delim == ']') {
+ if (*p != ']') {
+ dout("missing matching ']'\n");
+ goto bad;
+ }
+ p++;
+ }
+
/* port? */
if (p < end && *p == ':') {
port = 0;
@@ -1055,7 +1058,7 @@ int ceph_parse_ips(const char *c, const char *end,
return 0;
bad:
- pr_err("parse_ips bad ip '%s'\n", c);
+ pr_err("parse_ips bad ip '%.*s'\n", (int)(end - c), c);
return -EINVAL;
}
@@ -1396,10 +1399,12 @@ static int read_partial_message(struct ceph_connection *con)
if (!con->in_msg) {
dout("got hdr type %d front %d data %d\n", con->in_hdr.type,
con->in_hdr.front_len, con->in_hdr.data_len);
+ skip = 0;
con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip);
if (skip) {
/* skip this message */
dout("alloc_msg said skip message\n");
+ BUG_ON(con->in_msg);
con->in_base_pos = -front_len - middle_len - data_len -
sizeof(m->footer);
con->in_tag = CEPH_MSGR_TAG_READY;
@@ -2013,20 +2018,20 @@ void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg)
{
mutex_lock(&con->mutex);
if (!list_empty(&msg->list_head)) {
- dout("con_revoke %p msg %p\n", con, msg);
+ dout("con_revoke %p msg %p - was on queue\n", con, msg);
list_del_init(&msg->list_head);
ceph_msg_put(msg);
msg->hdr.seq = 0;
- if (con->out_msg == msg) {
- ceph_msg_put(con->out_msg);
- con->out_msg = NULL;
- }
+ }
+ if (con->out_msg == msg) {
+ dout("con_revoke %p msg %p - was sending\n", con, msg);
+ con->out_msg = NULL;
if (con->out_kvec_is_msg) {
con->out_skip = con->out_kvec_bytes;
con->out_kvec_is_msg = false;
}
- } else {
- dout("con_revoke %p msg %p - not queued (sent?)\n", con, msg);
+ ceph_msg_put(msg);
+ msg->hdr.seq = 0;
}
mutex_unlock(&con->mutex);
}
diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c
index 07a539906e6..54fe01c5070 100644
--- a/fs/ceph/mon_client.c
+++ b/fs/ceph/mon_client.c
@@ -345,7 +345,7 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc,
out:
mutex_unlock(&monc->mutex);
- wake_up(&client->auth_wq);
+ wake_up_all(&client->auth_wq);
}
/*
@@ -462,7 +462,7 @@ static void handle_statfs_reply(struct ceph_mon_client *monc,
}
mutex_unlock(&monc->mutex);
if (req) {
- complete(&req->completion);
+ complete_all(&req->completion);
put_generic_request(req);
}
return;
@@ -718,14 +718,15 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
monc->m_auth->front_max);
if (ret < 0) {
monc->client->auth_err = ret;
- wake_up(&monc->client->auth_wq);
+ wake_up_all(&monc->client->auth_wq);
} else if (ret > 0) {
__send_prepared_auth_request(monc, ret);
} else if (!was_auth && monc->auth->ops->is_authenticated(monc->auth)) {
dout("authenticated, starting session\n");
monc->client->msgr->inst.name.type = CEPH_ENTITY_TYPE_CLIENT;
- monc->client->msgr->inst.name.num = monc->auth->global_id;
+ monc->client->msgr->inst.name.num =
+ cpu_to_le64(monc->auth->global_id);
__send_subscribe(monc);
__resend_generic_request(monc);
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c
index d25b4add85b..e3852234789 100644
--- a/fs/ceph/osd_client.c
+++ b/fs/ceph/osd_client.c
@@ -862,12 +862,12 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
if (req->r_callback)
req->r_callback(req, msg);
else
- complete(&req->r_completion);
+ complete_all(&req->r_completion);
if (flags & CEPH_OSD_FLAG_ONDISK) {
if (req->r_safe_callback)
req->r_safe_callback(req, msg);
- complete(&req->r_safe_completion); /* fsync waiter */
+ complete_all(&req->r_safe_completion); /* fsync waiter */
}
done:
@@ -1083,7 +1083,7 @@ done:
if (newmap)
kick_requests(osdc, NULL);
up_read(&osdc->map_sem);
- wake_up(&osdc->client->auth_wq);
+ wake_up_all(&osdc->client->auth_wq);
return;
bad:
@@ -1344,7 +1344,7 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
int type = le16_to_cpu(msg->hdr.type);
if (!osd)
- return;
+ goto out;
osdc = osd->o_osdc;
switch (type) {
@@ -1359,6 +1359,7 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
pr_err("received unknown message type %d %s\n", type,
ceph_msg_type_name(type));
}
+out:
ceph_msg_put(msg);
}
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c
index ddc656fb5c0..416d46adbf8 100644
--- a/fs/ceph/osdmap.c
+++ b/fs/ceph/osdmap.c
@@ -568,6 +568,7 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
if (ev > CEPH_PG_POOL_VERSION) {
pr_warning("got unknown v %d > %d of ceph_pg_pool\n",
ev, CEPH_PG_POOL_VERSION);
+ kfree(pi);
goto bad;
}
__decode_pool(p, pi);
@@ -707,6 +708,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
newcrush = crush_decode(*p, min(*p+len, end));
if (IS_ERR(newcrush))
return ERR_CAST(newcrush);
+ *p += len;
}
/* new flags? */
@@ -829,12 +831,13 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
/* remove any? */
while (rbp && pgid_cmp(rb_entry(rbp, struct ceph_pg_mapping,
node)->pgid, pgid) <= 0) {
- struct rb_node *cur = rbp;
+ struct ceph_pg_mapping *cur =
+ rb_entry(rbp, struct ceph_pg_mapping, node);
+
rbp = rb_next(rbp);
- dout(" removed pg_temp %llx\n",
- *(u64 *)&rb_entry(cur, struct ceph_pg_mapping,
- node)->pgid);
- rb_erase(cur, &map->pg_temp);
+ dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid);
+ rb_erase(&cur->node, &map->pg_temp);
+ kfree(cur);
}
if (pglen) {
@@ -850,19 +853,22 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
for (j = 0; j < pglen; j++)
pg->osds[j] = ceph_decode_32(p);
err = __insert_pg_mapping(pg, &map->pg_temp);
- if (err)
+ if (err) {
+ kfree(pg);
goto bad;
+ }
dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid,
pglen);
}
}
while (rbp) {
- struct rb_node *cur = rbp;
+ struct ceph_pg_mapping *cur =
+ rb_entry(rbp, struct ceph_pg_mapping, node);
+
rbp = rb_next(rbp);
- dout(" removed pg_temp %llx\n",
- *(u64 *)&rb_entry(cur, struct ceph_pg_mapping,
- node)->pgid);
- rb_erase(cur, &map->pg_temp);
+ dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid);
+ rb_erase(&cur->node, &map->pg_temp);
+ kfree(cur);
}
/* ignore the rest */
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 80f35259680..5739fd7f88b 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -131,6 +131,15 @@ config CIFS_DFS_UPCALL
IP addresses) which is needed for implicit mounts of DFS junction
points. If unsure, say N.
+config CIFS_FSCACHE
+ bool "Provide CIFS client caching support (EXPERIMENTAL)"
+ depends on EXPERIMENTAL
+ depends on CIFS=m && FSCACHE || CIFS=y && FSCACHE=y
+ help
+ Makes CIFS FS-Cache capable. Say Y here if you want your CIFS data
+ to be cached locally on disk through the general filesystem cache
+ manager. If unsure, say N.
+
config CIFS_EXPERIMENTAL
bool "CIFS Experimental Features (EXPERIMENTAL)"
depends on CIFS && EXPERIMENTAL
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index 9948c0030e8..adefa60a9bd 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -11,3 +11,5 @@ cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \
cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o
cifs-$(CONFIG_CIFS_DFS_UPCALL) += dns_resolve.o cifs_dfs_ref.o
+
+cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o cache.o
diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c
new file mode 100644
index 00000000000..224d7bbd1fc
--- /dev/null
+++ b/fs/cifs/cache.c
@@ -0,0 +1,331 @@
+/*
+ * fs/cifs/cache.c - CIFS filesystem cache index structure definitions
+ *
+ * Copyright (c) 2010 Novell, Inc.
+ * Authors(s): Suresh Jayaraman (sjayaraman@suse.de>
+ *
+ * This library is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include "fscache.h"
+#include "cifs_debug.h"
+
+/*
+ * CIFS filesystem definition for FS-Cache
+ */
+struct fscache_netfs cifs_fscache_netfs = {
+ .name = "cifs",
+ .version = 0,
+};
+
+/*
+ * Register CIFS for caching with FS-Cache
+ */
+int cifs_fscache_register(void)
+{
+ return fscache_register_netfs(&cifs_fscache_netfs);
+}
+
+/*
+ * Unregister CIFS for caching
+ */
+void cifs_fscache_unregister(void)
+{
+ fscache_unregister_netfs(&cifs_fscache_netfs);
+}
+
+/*
+ * Key layout of CIFS server cache index object
+ */
+struct cifs_server_key {
+ uint16_t family; /* address family */
+ uint16_t port; /* IP port */
+ union {
+ struct in_addr ipv4_addr;
+ struct in6_addr ipv6_addr;
+ } addr[0];
+};
+
+/*
+ * Server object keyed by {IPaddress,port,family} tuple
+ */
+static uint16_t cifs_server_get_key(const void *cookie_netfs_data,
+ void *buffer, uint16_t maxbuf)
+{
+ const struct TCP_Server_Info *server = cookie_netfs_data;
+ const struct sockaddr *sa = (struct sockaddr *) &server->addr.sockAddr;
+ struct cifs_server_key *key = buffer;
+ uint16_t key_len = sizeof(struct cifs_server_key);
+
+ memset(key, 0, key_len);
+
+ /*
+ * Should not be a problem as sin_family/sin6_family overlays
+ * sa_family field
+ */
+ switch (sa->sa_family) {
+ case AF_INET:
+ key->family = server->addr.sockAddr.sin_family;
+ key->port = server->addr.sockAddr.sin_port;
+ key->addr[0].ipv4_addr = server->addr.sockAddr.sin_addr;
+ key_len += sizeof(key->addr[0].ipv4_addr);
+ break;
+
+ case AF_INET6:
+ key->family = server->addr.sockAddr6.sin6_family;
+ key->port = server->addr.sockAddr6.sin6_port;
+ key->addr[0].ipv6_addr = server->addr.sockAddr6.sin6_addr;
+ key_len += sizeof(key->addr[0].ipv6_addr);
+ break;
+
+ default:
+ cERROR(1, "CIFS: Unknown network family '%d'", sa->sa_family);
+ key_len = 0;
+ break;
+ }
+
+ return key_len;
+}
+
+/*
+ * Server object for FS-Cache
+ */
+const struct fscache_cookie_def cifs_fscache_server_index_def = {
+ .name = "CIFS.server",
+ .type = FSCACHE_COOKIE_TYPE_INDEX,
+ .get_key = cifs_server_get_key,
+};
+
+/*
+ * Auxiliary data attached to CIFS superblock within the cache
+ */
+struct cifs_fscache_super_auxdata {
+ u64 resource_id; /* unique server resource id */
+};
+
+static char *extract_sharename(const char *treename)
+{
+ const char *src;
+ char *delim, *dst;
+ int len;
+
+ /* skip double chars at the beginning */
+ src = treename + 2;
+
+ /* share name is always preceded by '\\' now */
+ delim = strchr(src, '\\');
+ if (!delim)
+ return ERR_PTR(-EINVAL);
+ delim++;
+ len = strlen(delim);
+
+ /* caller has to free the memory */
+ dst = kstrndup(delim, len, GFP_KERNEL);
+ if (!dst)
+ return ERR_PTR(-ENOMEM);
+
+ return dst;
+}
+
+/*
+ * Superblock object currently keyed by share name
+ */
+static uint16_t cifs_super_get_key(const void *cookie_netfs_data, void *buffer,
+ uint16_t maxbuf)
+{
+ const struct cifsTconInfo *tcon = cookie_netfs_data;
+ char *sharename;
+ uint16_t len;
+
+ sharename = extract_sharename(tcon->treeName);
+ if (IS_ERR(sharename)) {
+ cFYI(1, "CIFS: couldn't extract sharename\n");
+ sharename = NULL;
+ return 0;
+ }
+
+ len = strlen(sharename);
+ if (len > maxbuf)
+ return 0;
+
+ memcpy(buffer, sharename, len);
+
+ kfree(sharename);
+
+ return len;
+}
+
+static uint16_t
+cifs_fscache_super_get_aux(const void *cookie_netfs_data, void *buffer,
+ uint16_t maxbuf)
+{
+ struct cifs_fscache_super_auxdata auxdata;
+ const struct cifsTconInfo *tcon = cookie_netfs_data;
+
+ memset(&auxdata, 0, sizeof(auxdata));
+ auxdata.resource_id = tcon->resource_id;
+
+ if (maxbuf > sizeof(auxdata))
+ maxbuf = sizeof(auxdata);
+
+ memcpy(buffer, &auxdata, maxbuf);
+
+ return maxbuf;
+}
+
+static enum
+fscache_checkaux cifs_fscache_super_check_aux(void *cookie_netfs_data,
+ const void *data,
+ uint16_t datalen)
+{
+ struct cifs_fscache_super_auxdata auxdata;
+ const struct cifsTconInfo *tcon = cookie_netfs_data;
+
+ if (datalen != sizeof(auxdata))
+ return FSCACHE_CHECKAUX_OBSOLETE;
+
+ memset(&auxdata, 0, sizeof(auxdata));
+ auxdata.resource_id = tcon->resource_id;
+
+ if (memcmp(data, &auxdata, datalen) != 0)
+ return FSCACHE_CHECKAUX_OBSOLETE;
+
+ return FSCACHE_CHECKAUX_OKAY;
+}
+
+/*
+ * Superblock object for FS-Cache
+ */
+const struct fscache_cookie_def cifs_fscache_super_index_def = {
+ .name = "CIFS.super",
+ .type = FSCACHE_COOKIE_TYPE_INDEX,
+ .get_key = cifs_super_get_key,
+ .get_aux = cifs_fscache_super_get_aux,
+ .check_aux = cifs_fscache_super_check_aux,
+};
+
+/*
+ * Auxiliary data attached to CIFS inode within the cache
+ */
+struct cifs_fscache_inode_auxdata {
+ struct timespec last_write_time;
+ struct timespec last_change_time;
+ u64 eof;
+};
+
+static uint16_t cifs_fscache_inode_get_key(const void *cookie_netfs_data,
+ void *buffer, uint16_t maxbuf)
+{
+ const struct cifsInodeInfo *cifsi = cookie_netfs_data;
+ uint16_t keylen;
+
+ /* use the UniqueId as the key */
+ keylen = sizeof(cifsi->uniqueid);
+ if (keylen > maxbuf)
+ keylen = 0;
+ else
+ memcpy(buffer, &cifsi->uniqueid, keylen);
+
+ return keylen;
+}
+
+static void
+cifs_fscache_inode_get_attr(const void *cookie_netfs_data, uint64_t *size)
+{
+ const struct cifsInodeInfo *cifsi = cookie_netfs_data;
+
+ *size = cifsi->vfs_inode.i_size;
+}
+
+static uint16_t
+cifs_fscache_inode_get_aux(const void *cookie_netfs_data, void *buffer,
+ uint16_t maxbuf)
+{
+ struct cifs_fscache_inode_auxdata auxdata;
+ const struct cifsInodeInfo *cifsi = cookie_netfs_data;
+
+ memset(&auxdata, 0, sizeof(auxdata));
+ auxdata.eof = cifsi->server_eof;
+ auxdata.last_write_time = cifsi->vfs_inode.i_mtime;
+ auxdata.last_change_time = cifsi->vfs_inode.i_ctime;
+
+ if (maxbuf > sizeof(auxdata))
+ maxbuf = sizeof(auxdata);
+
+ memcpy(buffer, &auxdata, maxbuf);
+
+ return maxbuf;
+}
+
+static enum
+fscache_checkaux cifs_fscache_inode_check_aux(void *cookie_netfs_data,
+ const void *data,
+ uint16_t datalen)
+{
+ struct cifs_fscache_inode_auxdata auxdata;
+ struct cifsInodeInfo *cifsi = cookie_netfs_data;
+
+ if (datalen != sizeof(auxdata))
+ return FSCACHE_CHECKAUX_OBSOLETE;
+
+ memset(&auxdata, 0, sizeof(auxdata));
+ auxdata.eof = cifsi->server_eof;
+ auxdata.last_write_time = cifsi->vfs_inode.i_mtime;
+ auxdata.last_change_time = cifsi->vfs_inode.i_ctime;
+
+ if (memcmp(data, &auxdata, datalen) != 0)
+ return FSCACHE_CHECKAUX_OBSOLETE;
+
+ return FSCACHE_CHECKAUX_OKAY;
+}
+
+static void cifs_fscache_inode_now_uncached(void *cookie_netfs_data)
+{
+ struct cifsInodeInfo *cifsi = cookie_netfs_data;
+ struct pagevec pvec;
+ pgoff_t first;
+ int loop, nr_pages;
+
+ pagevec_init(&pvec, 0);
+ first = 0;
+
+ cFYI(1, "cifs inode 0x%p now uncached", cifsi);
+
+ for (;;) {
+ nr_pages = pagevec_lookup(&pvec,
+ cifsi->vfs_inode.i_mapping, first,
+ PAGEVEC_SIZE - pagevec_count(&pvec));
+ if (!nr_pages)
+ break;
+
+ for (loop = 0; loop < nr_pages; loop++)
+ ClearPageFsCache(pvec.pages[loop]);
+
+ first = pvec.pages[nr_pages - 1]->index + 1;
+
+ pvec.nr = nr_pages;
+ pagevec_release(&pvec);
+ cond_resched();
+ }
+}
+
+const struct fscache_cookie_def cifs_fscache_inode_object_def = {
+ .name = "CIFS.uniqueid",
+ .type = FSCACHE_COOKIE_TYPE_DATAFILE,
+ .get_key = cifs_fscache_inode_get_key,
+ .get_attr = cifs_fscache_inode_get_attr,
+ .get_aux = cifs_fscache_inode_get_aux,
+ .check_aux = cifs_fscache_inode_check_aux,
+ .now_uncached = cifs_fscache_inode_now_uncached,
+};
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index ac19a6f3dae..dc1ed50ea06 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -230,28 +230,22 @@ compose_mount_options_err:
goto compose_mount_options_out;
}
-
-static struct vfsmount *cifs_dfs_do_refmount(const struct vfsmount *mnt_parent,
- struct dentry *dentry, const struct dfs_info3_param *ref)
+/**
+ * cifs_dfs_do_refmount - mounts specified path using provided refferal
+ * @cifs_sb: parent/root superblock
+ * @fullpath: full path in UNC format
+ * @ref: server's referral
+ */
+static struct vfsmount *cifs_dfs_do_refmount(struct cifs_sb_info *cifs_sb,
+ const char *fullpath, const struct dfs_info3_param *ref)
{
- struct cifs_sb_info *cifs_sb;
struct vfsmount *mnt;
char *mountdata;
char *devname = NULL;
- char *fullpath;
-
- cifs_sb = CIFS_SB(dentry->d_inode->i_sb);
- /*
- * this function gives us a path with a double backslash prefix. We
- * require a single backslash for DFS.
- */
- fullpath = build_path_from_dentry(dentry);
- if (!fullpath)
- return ERR_PTR(-ENOMEM);
+ /* strip first '\' from fullpath */
mountdata = cifs_compose_mount_options(cifs_sb->mountdata,
fullpath + 1, ref, &devname);
- kfree(fullpath);
if (IS_ERR(mountdata))
return (struct vfsmount *)mountdata;
@@ -357,8 +351,8 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
rc = -EINVAL;
goto out_err;
}
- mnt = cifs_dfs_do_refmount(nd->path.mnt,
- nd->path.dentry, referrals + i);
+ mnt = cifs_dfs_do_refmount(cifs_sb,
+ full_path, referrals + i);
cFYI(1, "%s: cifs_dfs_do_refmount:%s , mnt:%p", __func__,
referrals[i].node_name, mnt);
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 246a167cb91..9e771450c3b 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -35,6 +35,7 @@
#define CIFS_MOUNT_DYNPERM 0x1000 /* allow in-memory only mode setting */
#define CIFS_MOUNT_NOPOSIXBRL 0x2000 /* mandatory not posix byte range lock */
#define CIFS_MOUNT_NOSSYNC 0x4000 /* don't do slow SMBflush on every sync*/
+#define CIFS_MOUNT_FSCACHE 0x8000 /* local caching enabled */
struct cifs_sb_info {
struct cifsTconInfo *tcon; /* primary mount */
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 379bd7d9c05..6effccff85a 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -144,6 +144,9 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
sprintf(dp, ";uid=0x%x", sesInfo->linux_uid);
dp = description + strlen(description);
+ sprintf(dp, ";creduid=0x%x", sesInfo->cred_uid);
+
+ dp = description + strlen(description);
sprintf(dp, ";user=%s", sesInfo->userName);
dp = description + strlen(description);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 78c02eb4cb1..8a2cf129e53 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -47,6 +47,7 @@
#include <linux/key-type.h>
#include "dns_resolve.h"
#include "cifs_spnego.h"
+#include "fscache.h"
#define CIFS_MAGIC_NUMBER 0xFF534D42 /* the first four bytes of SMB PDUs */
int cifsFYI = 0;
@@ -329,6 +330,12 @@ cifs_destroy_inode(struct inode *inode)
}
static void
+cifs_clear_inode(struct inode *inode)
+{
+ cifs_fscache_release_inode_cookie(inode);
+}
+
+static void
cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server)
{
seq_printf(s, ",addr=");
@@ -473,14 +480,25 @@ static int cifs_remount(struct super_block *sb, int *flags, char *data)
return 0;
}
+void cifs_drop_inode(struct inode *inode)
+{
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)
+ return generic_drop_inode(inode);
+
+ return generic_delete_inode(inode);
+}
+
static const struct super_operations cifs_super_ops = {
.put_super = cifs_put_super,
.statfs = cifs_statfs,
.alloc_inode = cifs_alloc_inode,
.destroy_inode = cifs_destroy_inode,
-/* .drop_inode = generic_delete_inode,
- .delete_inode = cifs_delete_inode, */ /* Do not need above two
- functions unless later we add lazy close of inodes or unless the
+ .drop_inode = cifs_drop_inode,
+ .clear_inode = cifs_clear_inode,
+/* .delete_inode = cifs_delete_inode, */ /* Do not need above
+ function unless later we add lazy close of inodes or unless the
kernel forgets to call us with the same number of releases (closes)
as opens */
.show_options = cifs_show_options,
@@ -892,6 +910,10 @@ init_cifs(void)
cFYI(1, "cifs_max_pending set to max of 256");
}
+ rc = cifs_fscache_register();
+ if (rc)
+ goto out;
+
rc = cifs_init_inodecache();
if (rc)
goto out_clean_proc;
@@ -913,7 +935,7 @@ init_cifs(void)
goto out_unregister_filesystem;
#endif
#ifdef CONFIG_CIFS_DFS_UPCALL
- rc = register_key_type(&key_type_dns_resolver);
+ rc = cifs_init_dns_resolver();
if (rc)
goto out_unregister_key_type;
#endif
@@ -925,7 +947,7 @@ init_cifs(void)
out_unregister_resolver_key:
#ifdef CONFIG_CIFS_DFS_UPCALL
- unregister_key_type(&key_type_dns_resolver);
+ cifs_exit_dns_resolver();
out_unregister_key_type:
#endif
#ifdef CONFIG_CIFS_UPCALL
@@ -941,6 +963,8 @@ init_cifs(void)
cifs_destroy_inodecache();
out_clean_proc:
cifs_proc_clean();
+ cifs_fscache_unregister();
+ out:
return rc;
}
@@ -949,9 +973,10 @@ exit_cifs(void)
{
cFYI(DBG2, "exit_cifs");
cifs_proc_clean();
+ cifs_fscache_unregister();
#ifdef CONFIG_CIFS_DFS_UPCALL
cifs_dfs_release_automount_timer();
- unregister_key_type(&key_type_dns_resolver);
+ cifs_exit_dns_resolver();
#endif
#ifdef CONFIG_CIFS_UPCALL
unregister_key_type(&cifs_spnego_key_type);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index a7eb65c84b1..d82f5fb4761 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -114,5 +114,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
extern const struct export_operations cifs_export_ops;
#endif /* EXPERIMENTAL */
-#define CIFS_VERSION "1.64"
+#define CIFS_VERSION "1.65"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index a88479ceaad..59906146ad3 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -16,6 +16,9 @@
* the GNU Lesser General Public License for more details.
*
*/
+#ifndef _CIFS_GLOB_H
+#define _CIFS_GLOB_H
+
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/slab.h>
@@ -34,7 +37,7 @@
#define MAX_SHARE_SIZE 64 /* used to be 20, this should still be enough */
#define MAX_USERNAME_SIZE 32 /* 32 is to allow for 15 char names + null
termination then *2 for unicode versions */
-#define MAX_PASSWORD_SIZE 16
+#define MAX_PASSWORD_SIZE 512 /* max for windows seems to be 256 wide chars */
#define CIFS_MIN_RCV_POOL 4
@@ -80,8 +83,7 @@ enum statusEnum {
};
enum securityEnum {
- PLAINTXT = 0, /* Legacy with Plaintext passwords */
- LANMAN, /* Legacy LANMAN auth */
+ LANMAN = 0, /* Legacy LANMAN auth */
NTLM, /* Legacy NTLM012 auth with NTLM hash */
NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */
RawNTLMSSP, /* NTLMSSP without SPNEGO, NTLMv2 hash */
@@ -142,7 +144,6 @@ struct TCP_Server_Info {
struct list_head pending_mid_q;
void *Server_NlsInfo; /* BB - placeholder for future NLS info */
unsigned short server_codepage; /* codepage for the server */
- unsigned long ip_address; /* IP addr for the server if known */
enum protocolEnum protocolType;
char versionMajor;
char versionMinor;
@@ -190,19 +191,9 @@ struct TCP_Server_Info {
bool sec_mskerberos; /* supports legacy MS Kerberos */
bool sec_kerberosu2u; /* supports U2U Kerberos */
bool sec_ntlmssp; /* supports NTLMSSP */
-};
-
-/*
- * The following is our shortcut to user information. We surface the uid,
- * and name. We always get the password on the fly in case it
- * has changed. We also hang a list of sessions owned by this user off here.
- */
-struct cifsUidInfo {
- struct list_head userList;
- struct list_head sessionList; /* SMB sessions for this user */
- uid_t linux_uid;
- char user[MAX_USERNAME_SIZE + 1]; /* ascii name of user */
- /* BB may need ptr or callback for PAM or WinBind info */
+#ifdef CONFIG_CIFS_FSCACHE
+ struct fscache_cookie *fscache; /* client index cache cookie */
+#endif
};
/*
@@ -212,9 +203,6 @@ struct cifsSesInfo {
struct list_head smb_ses_list;
struct list_head tcon_list;
struct mutex session_mutex;
-#if 0
- struct cifsUidInfo *uidInfo; /* pointer to user info */
-#endif
struct TCP_Server_Info *server; /* pointer to server info */
int ses_count; /* reference counter */
enum statusEnum status;
@@ -226,7 +214,8 @@ struct cifsSesInfo {
char *serverNOS; /* name of network operating system of server */
char *serverDomain; /* security realm of server */
int Suid; /* remote smb uid */
- uid_t linux_uid; /* local Linux uid */
+ uid_t linux_uid; /* overriding owner of files on the mount */
+ uid_t cred_uid; /* owner of credentials */
int capabilities;
char serverName[SERVER_NAME_LEN_WITH_NULL * 2]; /* BB make bigger for
TCP names - will ipv6 and sctp addresses fit? */
@@ -311,6 +300,10 @@ struct cifsTconInfo {
bool local_lease:1; /* check leases (only) on local system not remote */
bool broken_posix_open; /* e.g. Samba server versions < 3.3.2, 3.2.9 */
bool need_reconnect:1; /* connection reset, tid now invalid */
+#ifdef CONFIG_CIFS_FSCACHE
+ u64 resource_id; /* server resource id */
+ struct fscache_cookie *fscache; /* cookie for share */
+#endif
/* BB add field for back pointer to sb struct(s)? */
};
@@ -398,6 +391,9 @@ struct cifsInodeInfo {
bool invalid_mapping:1; /* pagecache is invalid */
u64 server_eof; /* current file size on server */
u64 uniqueid; /* server inode number */
+#ifdef CONFIG_CIFS_FSCACHE
+ struct fscache_cookie *fscache;
+#endif
struct inode vfs_inode;
};
@@ -733,3 +729,5 @@ GLOBAL_EXTERN unsigned int cifs_min_small; /* min size of small buf pool */
GLOBAL_EXTERN unsigned int cifs_max_pending; /* MAX requests at once to server*/
extern const struct slow_work_ops cifs_oplock_break_ops;
+
+#endif /* _CIFS_GLOB_H */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index fb1657e0fdb..2eaebbd3113 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -86,7 +86,9 @@ extern unsigned int smbCalcSize(struct smb_hdr *ptr);
extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr);
extern int decode_negTokenInit(unsigned char *security_blob, int length,
struct TCP_Server_Info *server);
-extern int cifs_convert_address(char *src, void *dst);
+extern int cifs_convert_address(struct sockaddr *dst, char *src);
+extern int cifs_fill_sockaddr(struct sockaddr *dst, char *src,
+ unsigned short int port);
extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr);
extern void header_assemble(struct smb_hdr *, char /* command */ ,
const struct cifsTconInfo *, int /* length of
@@ -106,7 +108,6 @@ extern struct cifsFileInfo *cifs_new_fileinfo(struct inode *newinode,
__u16 fileHandle, struct file *file,
struct vfsmount *mnt, unsigned int oflags);
extern int cifs_posix_open(char *full_path, struct inode **pinode,
- struct vfsmount *mnt,
struct super_block *sb,
int mode, int oflags,
__u32 *poplock, __u16 *pnetfid, int xid);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 2208f06e4c4..2a43a0aca96 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -48,6 +48,7 @@
#include "nterr.h"
#include "rfc1002pdu.h"
#include "cn_cifs.h"
+#include "fscache.h"
#define CIFS_PORT 445
#define RFC1001_PORT 139
@@ -66,6 +67,7 @@ struct smb_vol {
char *iocharset; /* local code page for mapping to and from Unicode */
char source_rfc1001_name[16]; /* netbios name of client */
char target_rfc1001_name[16]; /* netbios name of server for Win9x/ME */
+ uid_t cred_uid;
uid_t linux_uid;
gid_t linux_gid;
mode_t file_mode;
@@ -97,6 +99,7 @@ struct smb_vol {
bool noblocksnd:1;
bool noautotune:1;
bool nostrictsync:1; /* do not force expensive SMBflush on every sync */
+ bool fsc:1; /* enable fscache */
unsigned int rsize;
unsigned int wsize;
bool sockopt_tcp_nodelay:1;
@@ -830,7 +833,8 @@ cifs_parse_mount_options(char *options, const char *devname,
/* null target name indicates to use *SMBSERVR default called name
if we end up sending RFC1001 session initialize */
vol->target_rfc1001_name[0] = 0;
- vol->linux_uid = current_uid(); /* use current_euid() instead? */
+ vol->cred_uid = current_uid();
+ vol->linux_uid = current_uid();
vol->linux_gid = current_gid();
/* default to only allowing write access to owner of the mount */
@@ -1257,6 +1261,12 @@ cifs_parse_mount_options(char *options, const char *devname,
} else if ((strnicmp(data, "nocase", 6) == 0) ||
(strnicmp(data, "ignorecase", 10) == 0)) {
vol->nocase = 1;
+ } else if (strnicmp(data, "mand", 4) == 0) {
+ /* ignore */
+ } else if (strnicmp(data, "nomand", 6) == 0) {
+ /* ignore */
+ } else if (strnicmp(data, "_netdev", 7) == 0) {
+ /* ignore */
} else if (strnicmp(data, "brl", 3) == 0) {
vol->nobrl = 0;
} else if ((strnicmp(data, "nobrl", 5) == 0) ||
@@ -1331,6 +1341,8 @@ cifs_parse_mount_options(char *options, const char *devname,
printk(KERN_WARNING "CIFS: Mount option noac not "
"supported. Instead set "
"/proc/fs/cifs/LookupCacheEnabled to 0\n");
+ } else if (strnicmp(data, "fsc", 3) == 0) {
+ vol->fsc = true;
} else
printk(KERN_WARNING "CIFS: Unknown mount option %s\n",
data);
@@ -1380,18 +1392,92 @@ cifs_parse_mount_options(char *options, const char *devname,
return 0;
}
+static bool
+match_address(struct TCP_Server_Info *server, struct sockaddr *addr)
+{
+ struct sockaddr_in *addr4 = (struct sockaddr_in *)addr;
+ struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)addr;
+
+ switch (addr->sa_family) {
+ case AF_INET:
+ if (addr4->sin_addr.s_addr !=
+ server->addr.sockAddr.sin_addr.s_addr)
+ return false;
+ if (addr4->sin_port &&
+ addr4->sin_port != server->addr.sockAddr.sin_port)
+ return false;
+ break;
+ case AF_INET6:
+ if (!ipv6_addr_equal(&addr6->sin6_addr,
+ &server->addr.sockAddr6.sin6_addr))
+ return false;
+ if (addr6->sin6_scope_id !=
+ server->addr.sockAddr6.sin6_scope_id)
+ return false;
+ if (addr6->sin6_port &&
+ addr6->sin6_port != server->addr.sockAddr6.sin6_port)
+ return false;
+ break;
+ }
+
+ return true;
+}
+
+static bool
+match_security(struct TCP_Server_Info *server, struct smb_vol *vol)
+{
+ unsigned int secFlags;
+
+ if (vol->secFlg & (~(CIFSSEC_MUST_SIGN | CIFSSEC_MUST_SEAL)))
+ secFlags = vol->secFlg;
+ else
+ secFlags = global_secflags | vol->secFlg;
+
+ switch (server->secType) {
+ case LANMAN:
+ if (!(secFlags & (CIFSSEC_MAY_LANMAN|CIFSSEC_MAY_PLNTXT)))
+ return false;
+ break;
+ case NTLMv2:
+ if (!(secFlags & CIFSSEC_MAY_NTLMV2))
+ return false;
+ break;
+ case NTLM:
+ if (!(secFlags & CIFSSEC_MAY_NTLM))
+ return false;
+ break;
+ case Kerberos:
+ if (!(secFlags & CIFSSEC_MAY_KRB5))
+ return false;
+ break;
+ case RawNTLMSSP:
+ if (!(secFlags & CIFSSEC_MAY_NTLMSSP))
+ return false;
+ break;
+ default:
+ /* shouldn't happen */
+ return false;
+ }
+
+ /* now check if signing mode is acceptible */
+ if ((secFlags & CIFSSEC_MAY_SIGN) == 0 &&
+ (server->secMode & SECMODE_SIGN_REQUIRED))
+ return false;
+ else if (((secFlags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) &&
+ (server->secMode &
+ (SECMODE_SIGN_ENABLED|SECMODE_SIGN_REQUIRED)) == 0)
+ return false;
+
+ return true;
+}
+
static struct TCP_Server_Info *
-cifs_find_tcp_session(struct sockaddr_storage *addr, unsigned short int port)
+cifs_find_tcp_session(struct sockaddr *addr, struct smb_vol *vol)
{
- struct list_head *tmp;
struct TCP_Server_Info *server;
- struct sockaddr_in *addr4 = (struct sockaddr_in *) addr;
- struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *) addr;
write_lock(&cifs_tcp_ses_lock);
- list_for_each(tmp, &cifs_tcp_ses_list) {
- server = list_entry(tmp, struct TCP_Server_Info,
- tcp_ses_list);
+ list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) {
/*
* the demux thread can exit on its own while still in CifsNew
* so don't accept any sockets in that state. Since the
@@ -1401,37 +1487,11 @@ cifs_find_tcp_session(struct sockaddr_storage *addr, unsigned short int port)
if (server->tcpStatus == CifsNew)
continue;
- switch (addr->ss_family) {
- case AF_INET:
- if (addr4->sin_addr.s_addr ==
- server->addr.sockAddr.sin_addr.s_addr) {
- addr4->sin_port = htons(port);
- /* user overrode default port? */
- if (addr4->sin_port) {
- if (addr4->sin_port !=
- server->addr.sockAddr.sin_port)
- continue;
- }
- break;
- } else
- continue;
+ if (!match_address(server, addr))
+ continue;
- case AF_INET6:
- if (ipv6_addr_equal(&addr6->sin6_addr,
- &server->addr.sockAddr6.sin6_addr) &&
- (addr6->sin6_scope_id ==
- server->addr.sockAddr6.sin6_scope_id)) {
- addr6->sin6_port = htons(port);
- /* user overrode default port? */
- if (addr6->sin6_port) {
- if (addr6->sin6_port !=
- server->addr.sockAddr6.sin6_port)
- continue;
- }
- break;
- } else
- continue;
- }
+ if (!match_security(server, vol))
+ continue;
++server->srv_count;
write_unlock(&cifs_tcp_ses_lock);
@@ -1460,6 +1520,8 @@ cifs_put_tcp_session(struct TCP_Server_Info *server)
server->tcpStatus = CifsExiting;
spin_unlock(&GlobalMid_Lock);
+ cifs_fscache_release_client_cookie(server);
+
task = xchg(&server->tsk, NULL);
if (task)
force_sig(SIGKILL, task);
@@ -1479,7 +1541,9 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
cFYI(1, "UNC: %s ip: %s", volume_info->UNC, volume_info->UNCip);
if (volume_info->UNCip && volume_info->UNC) {
- rc = cifs_convert_address(volume_info->UNCip, &addr);
+ rc = cifs_fill_sockaddr((struct sockaddr *)&addr,
+ volume_info->UNCip,
+ volume_info->port);
if (!rc) {
/* we failed translating address */
rc = -EINVAL;
@@ -1499,7 +1563,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
}
/* see if we already have a matching tcp_ses */
- tcp_ses = cifs_find_tcp_session(&addr, volume_info->port);
+ tcp_ses = cifs_find_tcp_session((struct sockaddr *)&addr, volume_info);
if (tcp_ses)
return tcp_ses;
@@ -1543,12 +1607,10 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
cFYI(1, "attempting ipv6 connect");
/* BB should we allow ipv6 on port 139? */
/* other OS never observed in Wild doing 139 with v6 */
- sin_server6->sin6_port = htons(volume_info->port);
memcpy(&tcp_ses->addr.sockAddr6, sin_server6,
sizeof(struct sockaddr_in6));
rc = ipv6_connect(tcp_ses);
} else {
- sin_server->sin_port = htons(volume_info->port);
memcpy(&tcp_ses->addr.sockAddr, sin_server,
sizeof(struct sockaddr_in));
rc = ipv4_connect(tcp_ses);
@@ -1577,6 +1639,8 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
list_add(&tcp_ses->tcp_ses_list, &cifs_tcp_ses_list);
write_unlock(&cifs_tcp_ses_lock);
+ cifs_fscache_get_client_cookie(tcp_ses);
+
return tcp_ses;
out_err:
@@ -1591,17 +1655,27 @@ out_err:
}
static struct cifsSesInfo *
-cifs_find_smb_ses(struct TCP_Server_Info *server, char *username)
+cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol)
{
- struct list_head *tmp;
struct cifsSesInfo *ses;
write_lock(&cifs_tcp_ses_lock);
- list_for_each(tmp, &server->smb_ses_list) {
- ses = list_entry(tmp, struct cifsSesInfo, smb_ses_list);
- if (strncmp(ses->userName, username, MAX_USERNAME_SIZE))
- continue;
-
+ list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+ switch (server->secType) {
+ case Kerberos:
+ if (vol->cred_uid != ses->cred_uid)
+ continue;
+ break;
+ default:
+ /* anything else takes username/password */
+ if (strncmp(ses->userName, vol->username,
+ MAX_USERNAME_SIZE))
+ continue;
+ if (strlen(vol->username) != 0 &&
+ strncmp(ses->password, vol->password,
+ MAX_PASSWORD_SIZE))
+ continue;
+ }
++ses->ses_count;
write_unlock(&cifs_tcp_ses_lock);
return ses;
@@ -1643,7 +1717,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
xid = GetXid();
- ses = cifs_find_smb_ses(server, volume_info->username);
+ ses = cifs_find_smb_ses(server, volume_info);
if (ses) {
cFYI(1, "Existing smb sess found (status=%d)", ses->status);
@@ -1706,6 +1780,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
if (ses->domainName)
strcpy(ses->domainName, volume_info->domainname);
}
+ ses->cred_uid = volume_info->cred_uid;
ses->linux_uid = volume_info->linux_uid;
ses->overrideSecFlg = volume_info->secFlg;
@@ -1773,6 +1848,7 @@ cifs_put_tcon(struct cifsTconInfo *tcon)
CIFSSMBTDis(xid, tcon);
_FreeXid(xid);
+ cifs_fscache_release_super_cookie(tcon);
tconInfoFree(tcon);
cifs_put_smb_ses(ses);
}
@@ -1843,6 +1919,8 @@ cifs_get_tcon(struct cifsSesInfo *ses, struct smb_vol *volume_info)
list_add(&tcon->tcon_list, &ses->tcon_list);
write_unlock(&cifs_tcp_ses_lock);
+ cifs_fscache_get_super_cookie(tcon);
+
return tcon;
out_fail:
@@ -2397,6 +2475,8 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info,
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_GID;
if (pvolume_info->dynperm)
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DYNPERM;
+ if (pvolume_info->fsc)
+ cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_FSCACHE;
if (pvolume_info->direct_io) {
cFYI(1, "mounting share using direct i/o");
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO;
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 391816b461c..a7de5e9fff1 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -25,6 +25,7 @@
#include <linux/slab.h>
#include <linux/namei.h>
#include <linux/mount.h>
+#include <linux/file.h>
#include "cifsfs.h"
#include "cifspdu.h"
#include "cifsglob.h"
@@ -129,12 +130,6 @@ cifs_bp_rename_retry:
return full_path;
}
-/*
- * When called with struct file pointer set to NULL, there is no way we could
- * update file->private_data, but getting it stuck on openFileList provides a
- * way to access it from cifs_fill_filedata and thereby set file->private_data
- * from cifs_open.
- */
struct cifsFileInfo *
cifs_new_fileinfo(struct inode *newinode, __u16 fileHandle,
struct file *file, struct vfsmount *mnt, unsigned int oflags)
@@ -184,12 +179,13 @@ cifs_new_fileinfo(struct inode *newinode, __u16 fileHandle,
}
write_unlock(&GlobalSMBSeslock);
+ file->private_data = pCifsFile;
+
return pCifsFile;
}
int cifs_posix_open(char *full_path, struct inode **pinode,
- struct vfsmount *mnt, struct super_block *sb,
- int mode, int oflags,
+ struct super_block *sb, int mode, int oflags,
__u32 *poplock, __u16 *pnetfid, int xid)
{
int rc;
@@ -258,19 +254,6 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
cifs_fattr_to_inode(*pinode, &fattr);
}
- /*
- * cifs_fill_filedata() takes care of setting cifsFileInfo pointer to
- * file->private_data.
- */
- if (mnt) {
- struct cifsFileInfo *pfile_info;
-
- pfile_info = cifs_new_fileinfo(*pinode, *pnetfid, NULL, mnt,
- oflags);
- if (pfile_info == NULL)
- rc = -ENOMEM;
- }
-
posix_open_ret:
kfree(presp_data);
return rc;
@@ -298,7 +281,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
int create_options = CREATE_NOT_DIR;
__u32 oplock = 0;
int oflags;
- bool posix_create = false;
/*
* BB below access is probably too much for mknod to request
* but we have to do query and setpathinfo so requesting
@@ -339,7 +321,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
(CIFS_UNIX_POSIX_PATH_OPS_CAP &
le64_to_cpu(tcon->fsUnixInfo.Capability))) {
rc = cifs_posix_open(full_path, &newinode,
- nd ? nd->path.mnt : NULL,
inode->i_sb, mode, oflags, &oplock, &fileHandle, xid);
/* EIO could indicate that (posix open) operation is not
supported, despite what server claimed in capability
@@ -347,7 +328,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
handled in posix open */
if (rc == 0) {
- posix_create = true;
if (newinode == NULL) /* query inode info */
goto cifs_create_get_file_info;
else /* success, no need to query */
@@ -478,21 +458,28 @@ cifs_create_set_dentry:
else
cFYI(1, "Create worked, get_inode_info failed rc = %d", rc);
- /* nfsd case - nfs srv does not set nd */
- if ((nd == NULL) || (!(nd->flags & LOOKUP_OPEN))) {
- /* mknod case - do not leave file open */
- CIFSSMBClose(xid, tcon, fileHandle);
- } else if (!(posix_create) && (newinode)) {
+ if (newinode && nd && (nd->flags & LOOKUP_OPEN)) {
struct cifsFileInfo *pfile_info;
- /*
- * cifs_fill_filedata() takes care of setting cifsFileInfo
- * pointer to file->private_data.
- */
- pfile_info = cifs_new_fileinfo(newinode, fileHandle, NULL,
+ struct file *filp;
+
+ filp = lookup_instantiate_filp(nd, direntry, generic_file_open);
+ if (IS_ERR(filp)) {
+ rc = PTR_ERR(filp);
+ CIFSSMBClose(xid, tcon, fileHandle);
+ goto cifs_create_out;
+ }
+
+ pfile_info = cifs_new_fileinfo(newinode, fileHandle, filp,
nd->path.mnt, oflags);
- if (pfile_info == NULL)
+ if (pfile_info == NULL) {
+ fput(filp);
+ CIFSSMBClose(xid, tcon, fileHandle);
rc = -ENOMEM;
+ }
+ } else {
+ CIFSSMBClose(xid, tcon, fileHandle);
}
+
cifs_create_out:
kfree(buf);
kfree(full_path);
@@ -636,6 +623,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
bool posix_open = false;
struct cifs_sb_info *cifs_sb;
struct cifsTconInfo *pTcon;
+ struct cifsFileInfo *cfile;
struct inode *newInode = NULL;
char *full_path = NULL;
struct file *filp;
@@ -703,7 +691,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
if (nd && !(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) &&
(nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open &&
(nd->intent.open.flags & O_CREAT)) {
- rc = cifs_posix_open(full_path, &newInode, nd->path.mnt,
+ rc = cifs_posix_open(full_path, &newInode,
parent_dir_inode->i_sb,
nd->intent.open.create_mode,
nd->intent.open.flags, &oplock,
@@ -733,8 +721,25 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
else
direntry->d_op = &cifs_dentry_ops;
d_add(direntry, newInode);
- if (posix_open)
- filp = lookup_instantiate_filp(nd, direntry, NULL);
+ if (posix_open) {
+ filp = lookup_instantiate_filp(nd, direntry,
+ generic_file_open);
+ if (IS_ERR(filp)) {
+ rc = PTR_ERR(filp);
+ CIFSSMBClose(xid, pTcon, fileHandle);
+ goto lookup_out;
+ }
+
+ cfile = cifs_new_fileinfo(newInode, fileHandle, filp,
+ nd->path.mnt,
+ nd->intent.open.flags);
+ if (cfile == NULL) {
+ fput(filp);
+ CIFSSMBClose(xid, pTcon, fileHandle);
+ rc = -ENOMEM;
+ goto lookup_out;
+ }
+ }
/* since paths are not looked up by component - the parent
directories are presumed to be good here */
renew_parental_timestamps(direntry);
@@ -755,6 +760,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
is a common return code */
}
+lookup_out:
kfree(full_path);
FreeXid(xid);
return ERR_PTR(rc);
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index 4db2c5e7283..3ad7f4300c4 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -24,12 +24,16 @@
*/
#include <linux/slab.h>
+#include <linux/keyctl.h>
+#include <linux/key-type.h>
#include <keys/user-type.h>
#include "dns_resolve.h"
#include "cifsglob.h"
#include "cifsproto.h"
#include "cifs_debug.h"
+static const struct cred *dns_resolver_cache;
+
/* Checks if supplied name is IP address
* returns:
* 1 - name is IP
@@ -40,7 +44,7 @@ is_ip(char *name)
{
struct sockaddr_storage ss;
- return cifs_convert_address(name, &ss);
+ return cifs_convert_address((struct sockaddr *)&ss, name);
}
static int
@@ -94,6 +98,7 @@ struct key_type key_type_dns_resolver = {
int
dns_resolve_server_name_to_ip(const char *unc, char **ip_addr)
{
+ const struct cred *saved_cred;
int rc = -EAGAIN;
struct key *rkey = ERR_PTR(-EAGAIN);
char *name;
@@ -133,8 +138,15 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr)
goto skip_upcall;
}
+ saved_cred = override_creds(dns_resolver_cache);
rkey = request_key(&key_type_dns_resolver, name, "");
+ revert_creds(saved_cred);
if (!IS_ERR(rkey)) {
+ if (!(rkey->perm & KEY_USR_VIEW)) {
+ down_read(&rkey->sem);
+ rkey->perm |= KEY_USR_VIEW;
+ up_read(&rkey->sem);
+ }
len = rkey->type_data.x[0];
data = rkey->payload.data;
} else {
@@ -165,4 +177,61 @@ out:
return rc;
}
+int __init cifs_init_dns_resolver(void)
+{
+ struct cred *cred;
+ struct key *keyring;
+ int ret;
+
+ printk(KERN_NOTICE "Registering the %s key type\n",
+ key_type_dns_resolver.name);
+
+ /* create an override credential set with a special thread keyring in
+ * which DNS requests are cached
+ *
+ * this is used to prevent malicious redirections from being installed
+ * with add_key().
+ */
+ cred = prepare_kernel_cred(NULL);
+ if (!cred)
+ return -ENOMEM;
+
+ keyring = key_alloc(&key_type_keyring, ".dns_resolver", 0, 0, cred,
+ (KEY_POS_ALL & ~KEY_POS_SETATTR) |
+ KEY_USR_VIEW | KEY_USR_READ,
+ KEY_ALLOC_NOT_IN_QUOTA);
+ if (IS_ERR(keyring)) {
+ ret = PTR_ERR(keyring);
+ goto failed_put_cred;
+ }
+
+ ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL);
+ if (ret < 0)
+ goto failed_put_key;
+
+ ret = register_key_type(&key_type_dns_resolver);
+ if (ret < 0)
+ goto failed_put_key;
+
+ /* instruct request_key() to use this special keyring as a cache for
+ * the results it looks up */
+ cred->thread_keyring = keyring;
+ cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
+ dns_resolver_cache = cred;
+ return 0;
+
+failed_put_key:
+ key_put(keyring);
+failed_put_cred:
+ put_cred(cred);
+ return ret;
+}
+void cifs_exit_dns_resolver(void)
+{
+ key_revoke(dns_resolver_cache->thread_keyring);
+ unregister_key_type(&key_type_dns_resolver);
+ put_cred(dns_resolver_cache);
+ printk(KERN_NOTICE "Unregistered %s key type\n",
+ key_type_dns_resolver.name);
+}
diff --git a/fs/cifs/dns_resolve.h b/fs/cifs/dns_resolve.h
index 966e9288930..5d7f291df16 100644
--- a/fs/cifs/dns_resolve.h
+++ b/fs/cifs/dns_resolve.h
@@ -24,8 +24,8 @@
#define _DNS_RESOLVE_H
#ifdef __KERNEL__
-#include <linux/key-type.h>
-extern struct key_type key_type_dns_resolver;
+extern int __init cifs_init_dns_resolver(void);
+extern void cifs_exit_dns_resolver(void);
extern int dns_resolve_server_name_to_ip(const char *unc, char **ip_addr);
#endif /* KERNEL */
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 75541af4b3d..fa04a00d126 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -40,6 +40,7 @@
#include "cifs_unicode.h"
#include "cifs_debug.h"
#include "cifs_fs_sb.h"
+#include "fscache.h"
static inline int cifs_convert_flags(unsigned int flags)
{
@@ -162,44 +163,12 @@ psx_client_can_cache:
return 0;
}
-static struct cifsFileInfo *
-cifs_fill_filedata(struct file *file)
-{
- struct list_head *tmp;
- struct cifsFileInfo *pCifsFile = NULL;
- struct cifsInodeInfo *pCifsInode = NULL;
-
- /* search inode for this file and fill in file->private_data */
- pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
- read_lock(&GlobalSMBSeslock);
- list_for_each(tmp, &pCifsInode->openFileList) {
- pCifsFile = list_entry(tmp, struct cifsFileInfo, flist);
- if ((pCifsFile->pfile == NULL) &&
- (pCifsFile->pid == current->tgid)) {
- /* mode set in cifs_create */
-
- /* needed for writepage */
- pCifsFile->pfile = file;
- file->private_data = pCifsFile;
- break;
- }
- }
- read_unlock(&GlobalSMBSeslock);
-
- if (file->private_data != NULL) {
- return pCifsFile;
- } else if ((file->f_flags & O_CREAT) && (file->f_flags & O_EXCL))
- cERROR(1, "could not find file instance for "
- "new file %p", file);
- return NULL;
-}
-
/* all arguments to this function must be checked for validity in caller */
-static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
- struct cifsInodeInfo *pCifsInode, struct cifsFileInfo *pCifsFile,
+static inline int cifs_open_inode_helper(struct inode *inode,
struct cifsTconInfo *pTcon, int *oplock, FILE_ALL_INFO *buf,
char *full_path, int xid)
{
+ struct cifsInodeInfo *pCifsInode = CIFS_I(inode);
struct timespec temp;
int rc;
@@ -213,36 +182,35 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
/* if not oplocked, invalidate inode pages if mtime or file
size changed */
temp = cifs_NTtimeToUnix(buf->LastWriteTime);
- if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) &&
- (file->f_path.dentry->d_inode->i_size ==
+ if (timespec_equal(&inode->i_mtime, &temp) &&
+ (inode->i_size ==
(loff_t)le64_to_cpu(buf->EndOfFile))) {
cFYI(1, "inode unchanged on server");
} else {
- if (file->f_path.dentry->d_inode->i_mapping) {
+ if (inode->i_mapping) {
/* BB no need to lock inode until after invalidate
since namei code should already have it locked? */
- rc = filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping);
+ rc = filemap_write_and_wait(inode->i_mapping);
if (rc != 0)
- CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc;
+ pCifsInode->write_behind_rc = rc;
}
cFYI(1, "invalidating remote inode since open detected it "
"changed");
- invalidate_remote_inode(file->f_path.dentry->d_inode);
+ invalidate_remote_inode(inode);
}
client_can_cache:
if (pTcon->unix_ext)
- rc = cifs_get_inode_info_unix(&file->f_path.dentry->d_inode,
- full_path, inode->i_sb, xid);
+ rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
+ xid);
else
- rc = cifs_get_inode_info(&file->f_path.dentry->d_inode,
- full_path, buf, inode->i_sb, xid, NULL);
+ rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
+ xid, NULL);
if ((*oplock & 0xF) == OPLOCK_EXCLUSIVE) {
pCifsInode->clientCanCacheAll = true;
pCifsInode->clientCanCacheRead = true;
- cFYI(1, "Exclusive Oplock granted on inode %p",
- file->f_path.dentry->d_inode);
+ cFYI(1, "Exclusive Oplock granted on inode %p", inode);
} else if ((*oplock & 0xF) == OPLOCK_READ)
pCifsInode->clientCanCacheRead = true;
@@ -256,7 +224,7 @@ int cifs_open(struct inode *inode, struct file *file)
__u32 oplock;
struct cifs_sb_info *cifs_sb;
struct cifsTconInfo *tcon;
- struct cifsFileInfo *pCifsFile;
+ struct cifsFileInfo *pCifsFile = NULL;
struct cifsInodeInfo *pCifsInode;
char *full_path = NULL;
int desiredAccess;
@@ -270,12 +238,6 @@ int cifs_open(struct inode *inode, struct file *file)
tcon = cifs_sb->tcon;
pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
- pCifsFile = cifs_fill_filedata(file);
- if (pCifsFile) {
- rc = 0;
- FreeXid(xid);
- return rc;
- }
full_path = build_path_from_dentry(file->f_path.dentry);
if (full_path == NULL) {
@@ -299,8 +261,7 @@ int cifs_open(struct inode *inode, struct file *file)
int oflags = (int) cifs_posix_convert_flags(file->f_flags);
oflags |= SMB_O_CREAT;
/* can not refresh inode info since size could be stale */
- rc = cifs_posix_open(full_path, &inode, file->f_path.mnt,
- inode->i_sb,
+ rc = cifs_posix_open(full_path, &inode, inode->i_sb,
cifs_sb->mnt_file_mode /* ignored */,
oflags, &oplock, &netfid, xid);
if (rc == 0) {
@@ -308,9 +269,23 @@ int cifs_open(struct inode *inode, struct file *file)
/* no need for special case handling of setting mode
on read only files needed here */
- pCifsFile = cifs_fill_filedata(file);
- cifs_posix_open_inode_helper(inode, file, pCifsInode,
- oplock, netfid);
+ rc = cifs_posix_open_inode_helper(inode, file,
+ pCifsInode, oplock, netfid);
+ if (rc != 0) {
+ CIFSSMBClose(xid, tcon, netfid);
+ goto out;
+ }
+
+ pCifsFile = cifs_new_fileinfo(inode, netfid, file,
+ file->f_path.mnt,
+ oflags);
+ if (pCifsFile == NULL) {
+ CIFSSMBClose(xid, tcon, netfid);
+ rc = -ENOMEM;
+ }
+
+ cifs_fscache_set_inode_cookie(inode, file);
+
goto out;
} else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
if (tcon->ses->serverNOS)
@@ -391,16 +366,18 @@ int cifs_open(struct inode *inode, struct file *file)
goto out;
}
+ rc = cifs_open_inode_helper(inode, tcon, &oplock, buf, full_path, xid);
+ if (rc != 0)
+ goto out;
+
pCifsFile = cifs_new_fileinfo(inode, netfid, file, file->f_path.mnt,
file->f_flags);
- file->private_data = pCifsFile;
- if (file->private_data == NULL) {
+ if (pCifsFile == NULL) {
rc = -ENOMEM;
goto out;
}
- rc = cifs_open_inode_helper(inode, file, pCifsInode, pCifsFile, tcon,
- &oplock, buf, full_path, xid);
+ cifs_fscache_set_inode_cookie(inode, file);
if (oplock & CIFS_CREATE_ACTION) {
/* time to set mode which we can not set earlier due to
@@ -456,7 +433,7 @@ static int cifs_reopen_file(struct file *file, bool can_flush)
__u16 netfid;
if (file->private_data)
- pCifsFile = (struct cifsFileInfo *)file->private_data;
+ pCifsFile = file->private_data;
else
return -EBADF;
@@ -513,8 +490,7 @@ reopen_error_exit:
le64_to_cpu(tcon->fsUnixInfo.Capability))) {
int oflags = (int) cifs_posix_convert_flags(file->f_flags);
/* can not refresh inode info since size could be stale */
- rc = cifs_posix_open(full_path, NULL, file->f_path.mnt,
- inode->i_sb,
+ rc = cifs_posix_open(full_path, NULL, inode->i_sb,
cifs_sb->mnt_file_mode /* ignored */,
oflags, &oplock, &netfid, xid);
if (rc == 0) {
@@ -595,8 +571,7 @@ int cifs_close(struct inode *inode, struct file *file)
int xid, timeout;
struct cifs_sb_info *cifs_sb;
struct cifsTconInfo *pTcon;
- struct cifsFileInfo *pSMBFile =
- (struct cifsFileInfo *)file->private_data;
+ struct cifsFileInfo *pSMBFile = file->private_data;
xid = GetXid();
@@ -671,8 +646,7 @@ int cifs_closedir(struct inode *inode, struct file *file)
{
int rc = 0;
int xid;
- struct cifsFileInfo *pCFileStruct =
- (struct cifsFileInfo *)file->private_data;
+ struct cifsFileInfo *pCFileStruct = file->private_data;
char *ptmp;
cFYI(1, "Closedir inode = 0x%p", inode);
@@ -893,8 +867,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
length, pfLock,
posix_lock_type, wait_flag);
} else {
- struct cifsFileInfo *fid =
- (struct cifsFileInfo *)file->private_data;
+ struct cifsFileInfo *fid = file->private_data;
if (numLock) {
rc = CIFSSMBLock(xid, tcon, netfid, length,
@@ -995,7 +968,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
if (file->private_data == NULL)
return -EBADF;
- open_file = (struct cifsFileInfo *) file->private_data;
+ open_file = file->private_data;
rc = generic_write_checks(file, poffset, &write_size, 0);
if (rc)
@@ -1097,7 +1070,7 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
if (file->private_data == NULL)
return -EBADF;
- open_file = (struct cifsFileInfo *)file->private_data;
+ open_file = file->private_data;
xid = GetXid();
@@ -1681,8 +1654,7 @@ int cifs_fsync(struct file *file, int datasync)
int xid;
int rc = 0;
struct cifsTconInfo *tcon;
- struct cifsFileInfo *smbfile =
- (struct cifsFileInfo *)file->private_data;
+ struct cifsFileInfo *smbfile = file->private_data;
struct inode *inode = file->f_path.dentry->d_inode;
xid = GetXid();
@@ -1786,7 +1758,7 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data,
FreeXid(xid);
return rc;
}
- open_file = (struct cifsFileInfo *)file->private_data;
+ open_file = file->private_data;
if ((file->f_flags & O_ACCMODE) == O_WRONLY)
cFYI(1, "attempting read on write only file instance");
@@ -1867,7 +1839,7 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
FreeXid(xid);
return rc;
}
- open_file = (struct cifsFileInfo *)file->private_data;
+ open_file = file->private_data;
if ((file->f_flags & O_ACCMODE) == O_WRONLY)
cFYI(1, "attempting read on write only file instance");
@@ -1972,6 +1944,9 @@ static void cifs_copy_cache_pages(struct address_space *mapping,
SetPageUptodate(page);
unlock_page(page);
data += PAGE_CACHE_SIZE;
+
+ /* add page to FS-Cache */
+ cifs_readpage_to_fscache(mapping->host, page);
}
return;
}
@@ -1998,10 +1973,19 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
FreeXid(xid);
return rc;
}
- open_file = (struct cifsFileInfo *)file->private_data;
+ open_file = file->private_data;
cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
pTcon = cifs_sb->tcon;
+ /*
+ * Reads as many pages as possible from fscache. Returns -ENOBUFS
+ * immediately if the cookie is negative
+ */
+ rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
+ &num_pages);
+ if (rc == 0)
+ goto read_complete;
+
cFYI(DBG2, "rpages: num pages %d", num_pages);
for (i = 0; i < num_pages; ) {
unsigned contig_pages;
@@ -2112,6 +2096,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
smb_read_data = NULL;
}
+read_complete:
FreeXid(xid);
return rc;
}
@@ -2122,6 +2107,11 @@ static int cifs_readpage_worker(struct file *file, struct page *page,
char *read_data;
int rc;
+ /* Is the page cached? */
+ rc = cifs_readpage_from_fscache(file->f_path.dentry->d_inode, page);
+ if (rc == 0)
+ goto read_complete;
+
page_cache_get(page);
read_data = kmap(page);
/* for reads over a certain size could initiate async read ahead */
@@ -2141,11 +2131,17 @@ static int cifs_readpage_worker(struct file *file, struct page *page,
flush_dcache_page(page);
SetPageUptodate(page);
+
+ /* send this page to the cache */
+ cifs_readpage_to_fscache(file->f_path.dentry->d_inode, page);
+
rc = 0;
io_error:
kunmap(page);
page_cache_release(page);
+
+read_complete:
return rc;
}
@@ -2295,6 +2291,22 @@ out:
return rc;
}
+static int cifs_release_page(struct page *page, gfp_t gfp)
+{
+ if (PagePrivate(page))
+ return 0;
+
+ return cifs_fscache_release_page(page, gfp);
+}
+
+static void cifs_invalidate_page(struct page *page, unsigned long offset)
+{
+ struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
+
+ if (offset == 0)
+ cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
+}
+
static void
cifs_oplock_break(struct slow_work *work)
{
@@ -2368,6 +2380,8 @@ const struct address_space_operations cifs_addr_ops = {
.write_begin = cifs_write_begin,
.write_end = cifs_write_end,
.set_page_dirty = __set_page_dirty_nobuffers,
+ .releasepage = cifs_release_page,
+ .invalidatepage = cifs_invalidate_page,
/* .sync_page = cifs_sync_page, */
/* .direct_IO = */
};
@@ -2384,6 +2398,8 @@ const struct address_space_operations cifs_addr_ops_smallbuf = {
.write_begin = cifs_write_begin,
.write_end = cifs_write_end,
.set_page_dirty = __set_page_dirty_nobuffers,
+ .releasepage = cifs_release_page,
+ .invalidatepage = cifs_invalidate_page,
/* .sync_page = cifs_sync_page, */
/* .direct_IO = */
};
diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c
new file mode 100644
index 00000000000..9f3f5c4be16
--- /dev/null
+++ b/fs/cifs/fscache.c
@@ -0,0 +1,236 @@
+/*
+ * fs/cifs/fscache.c - CIFS filesystem cache interface
+ *
+ * Copyright (c) 2010 Novell, Inc.
+ * Author(s): Suresh Jayaraman (sjayaraman@suse.de>
+ *
+ * This library is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include "fscache.h"
+#include "cifsglob.h"
+#include "cifs_debug.h"
+#include "cifs_fs_sb.h"
+
+void cifs_fscache_get_client_cookie(struct TCP_Server_Info *server)
+{
+ server->fscache =
+ fscache_acquire_cookie(cifs_fscache_netfs.primary_index,
+ &cifs_fscache_server_index_def, server);
+ cFYI(1, "CIFS: get client cookie (0x%p/0x%p)", server,
+ server->fscache);
+}
+
+void cifs_fscache_release_client_cookie(struct TCP_Server_Info *server)
+{
+ cFYI(1, "CIFS: release client cookie (0x%p/0x%p)", server,
+ server->fscache);
+ fscache_relinquish_cookie(server->fscache, 0);
+ server->fscache = NULL;
+}
+
+void cifs_fscache_get_super_cookie(struct cifsTconInfo *tcon)
+{
+ struct TCP_Server_Info *server = tcon->ses->server;
+
+ tcon->fscache =
+ fscache_acquire_cookie(server->fscache,
+ &cifs_fscache_super_index_def, tcon);
+ cFYI(1, "CIFS: get superblock cookie (0x%p/0x%p)",
+ server->fscache, tcon->fscache);
+}
+
+void cifs_fscache_release_super_cookie(struct cifsTconInfo *tcon)
+{
+ cFYI(1, "CIFS: releasing superblock cookie (0x%p)", tcon->fscache);
+ fscache_relinquish_cookie(tcon->fscache, 0);
+ tcon->fscache = NULL;
+}
+
+static void cifs_fscache_enable_inode_cookie(struct inode *inode)
+{
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+
+ if (cifsi->fscache)
+ return;
+
+ cifsi->fscache = fscache_acquire_cookie(cifs_sb->tcon->fscache,
+ &cifs_fscache_inode_object_def,
+ cifsi);
+ cFYI(1, "CIFS: got FH cookie (0x%p/0x%p)",
+ cifs_sb->tcon->fscache, cifsi->fscache);
+}
+
+void cifs_fscache_release_inode_cookie(struct inode *inode)
+{
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
+
+ if (cifsi->fscache) {
+ cFYI(1, "CIFS releasing inode cookie (0x%p)",
+ cifsi->fscache);
+ fscache_relinquish_cookie(cifsi->fscache, 0);
+ cifsi->fscache = NULL;
+ }
+}
+
+static void cifs_fscache_disable_inode_cookie(struct inode *inode)
+{
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
+
+ if (cifsi->fscache) {
+ cFYI(1, "CIFS disabling inode cookie (0x%p)",
+ cifsi->fscache);
+ fscache_relinquish_cookie(cifsi->fscache, 1);
+ cifsi->fscache = NULL;
+ }
+}
+
+void cifs_fscache_set_inode_cookie(struct inode *inode, struct file *filp)
+{
+ if ((filp->f_flags & O_ACCMODE) != O_RDONLY)
+ cifs_fscache_disable_inode_cookie(inode);
+ else {
+ cifs_fscache_enable_inode_cookie(inode);
+ cFYI(1, "CIFS: fscache inode cookie set");
+ }
+}
+
+void cifs_fscache_reset_inode_cookie(struct inode *inode)
+{
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct fscache_cookie *old = cifsi->fscache;
+
+ if (cifsi->fscache) {
+ /* retire the current fscache cache and get a new one */
+ fscache_relinquish_cookie(cifsi->fscache, 1);
+
+ cifsi->fscache = fscache_acquire_cookie(cifs_sb->tcon->fscache,
+ &cifs_fscache_inode_object_def,
+ cifsi);
+ cFYI(1, "CIFS: new cookie 0x%p oldcookie 0x%p",
+ cifsi->fscache, old);
+ }
+}
+
+int cifs_fscache_release_page(struct page *page, gfp_t gfp)
+{
+ if (PageFsCache(page)) {
+ struct inode *inode = page->mapping->host;
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
+
+ cFYI(1, "CIFS: fscache release page (0x%p/0x%p)",
+ page, cifsi->fscache);
+ if (!fscache_maybe_release_page(cifsi->fscache, page, gfp))
+ return 0;
+ }
+
+ return 1;
+}
+
+static void cifs_readpage_from_fscache_complete(struct page *page, void *ctx,
+ int error)
+{
+ cFYI(1, "CFS: readpage_from_fscache_complete (0x%p/%d)",
+ page, error);
+ if (!error)
+ SetPageUptodate(page);
+ unlock_page(page);
+}
+
+/*
+ * Retrieve a page from FS-Cache
+ */
+int __cifs_readpage_from_fscache(struct inode *inode, struct page *page)
+{
+ int ret;
+
+ cFYI(1, "CIFS: readpage_from_fscache(fsc:%p, p:%p, i:0x%p",
+ CIFS_I(inode)->fscache, page, inode);
+ ret = fscache_read_or_alloc_page(CIFS_I(inode)->fscache, page,
+ cifs_readpage_from_fscache_complete,
+ NULL,
+ GFP_KERNEL);
+ switch (ret) {
+
+ case 0: /* page found in fscache, read submitted */
+ cFYI(1, "CIFS: readpage_from_fscache: submitted");
+ return ret;
+ case -ENOBUFS: /* page won't be cached */
+ case -ENODATA: /* page not in cache */
+ cFYI(1, "CIFS: readpage_from_fscache %d", ret);
+ return 1;
+
+ default:
+ cERROR(1, "unknown error ret = %d", ret);
+ }
+ return ret;
+}
+
+/*
+ * Retrieve a set of pages from FS-Cache
+ */
+int __cifs_readpages_from_fscache(struct inode *inode,
+ struct address_space *mapping,
+ struct list_head *pages,
+ unsigned *nr_pages)
+{
+ int ret;
+
+ cFYI(1, "CIFS: __cifs_readpages_from_fscache (0x%p/%u/0x%p)",
+ CIFS_I(inode)->fscache, *nr_pages, inode);
+ ret = fscache_read_or_alloc_pages(CIFS_I(inode)->fscache, mapping,
+ pages, nr_pages,
+ cifs_readpage_from_fscache_complete,
+ NULL,
+ mapping_gfp_mask(mapping));
+ switch (ret) {
+ case 0: /* read submitted to the cache for all pages */
+ cFYI(1, "CIFS: readpages_from_fscache: submitted");
+ return ret;
+
+ case -ENOBUFS: /* some pages are not cached and can't be */
+ case -ENODATA: /* some pages are not cached */
+ cFYI(1, "CIFS: readpages_from_fscache: no page");
+ return 1;
+
+ default:
+ cFYI(1, "unknown error ret = %d", ret);
+ }
+
+ return ret;
+}
+
+void __cifs_readpage_to_fscache(struct inode *inode, struct page *page)
+{
+ int ret;
+
+ cFYI(1, "CIFS: readpage_to_fscache(fsc: %p, p: %p, i: %p",
+ CIFS_I(inode)->fscache, page, inode);
+ ret = fscache_write_page(CIFS_I(inode)->fscache, page, GFP_KERNEL);
+ if (ret != 0)
+ fscache_uncache_page(CIFS_I(inode)->fscache, page);
+}
+
+void __cifs_fscache_invalidate_page(struct page *page, struct inode *inode)
+{
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
+ struct fscache_cookie *cookie = cifsi->fscache;
+
+ cFYI(1, "CIFS: fscache invalidatepage (0x%p/0x%p)", page, cookie);
+ fscache_wait_on_page_write(cookie, page);
+ fscache_uncache_page(cookie, page);
+}
+
diff --git a/fs/cifs/fscache.h b/fs/cifs/fscache.h
new file mode 100644
index 00000000000..31b88ec2341
--- /dev/null
+++ b/fs/cifs/fscache.h
@@ -0,0 +1,136 @@
+/*
+ * fs/cifs/fscache.h - CIFS filesystem cache interface definitions
+ *
+ * Copyright (c) 2010 Novell, Inc.
+ * Authors(s): Suresh Jayaraman (sjayaraman@suse.de>
+ *
+ * This library is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#ifndef _CIFS_FSCACHE_H
+#define _CIFS_FSCACHE_H
+
+#include <linux/fscache.h>
+
+#include "cifsglob.h"
+
+#ifdef CONFIG_CIFS_FSCACHE
+
+extern struct fscache_netfs cifs_fscache_netfs;
+extern const struct fscache_cookie_def cifs_fscache_server_index_def;
+extern const struct fscache_cookie_def cifs_fscache_super_index_def;
+extern const struct fscache_cookie_def cifs_fscache_inode_object_def;
+
+extern int cifs_fscache_register(void);
+extern void cifs_fscache_unregister(void);
+
+/*
+ * fscache.c
+ */
+extern void cifs_fscache_get_client_cookie(struct TCP_Server_Info *);
+extern void cifs_fscache_release_client_cookie(struct TCP_Server_Info *);
+extern void cifs_fscache_get_super_cookie(struct cifsTconInfo *);
+extern void cifs_fscache_release_super_cookie(struct cifsTconInfo *);
+
+extern void cifs_fscache_release_inode_cookie(struct inode *);
+extern void cifs_fscache_set_inode_cookie(struct inode *, struct file *);
+extern void cifs_fscache_reset_inode_cookie(struct inode *);
+
+extern void __cifs_fscache_invalidate_page(struct page *, struct inode *);
+extern int cifs_fscache_release_page(struct page *page, gfp_t gfp);
+extern int __cifs_readpage_from_fscache(struct inode *, struct page *);
+extern int __cifs_readpages_from_fscache(struct inode *,
+ struct address_space *,
+ struct list_head *,
+ unsigned *);
+
+extern void __cifs_readpage_to_fscache(struct inode *, struct page *);
+
+static inline void cifs_fscache_invalidate_page(struct page *page,
+ struct inode *inode)
+{
+ if (PageFsCache(page))
+ __cifs_fscache_invalidate_page(page, inode);
+}
+
+static inline int cifs_readpage_from_fscache(struct inode *inode,
+ struct page *page)
+{
+ if (CIFS_I(inode)->fscache)
+ return __cifs_readpage_from_fscache(inode, page);
+
+ return -ENOBUFS;
+}
+
+static inline int cifs_readpages_from_fscache(struct inode *inode,
+ struct address_space *mapping,
+ struct list_head *pages,
+ unsigned *nr_pages)
+{
+ if (CIFS_I(inode)->fscache)
+ return __cifs_readpages_from_fscache(inode, mapping, pages,
+ nr_pages);
+ return -ENOBUFS;
+}
+
+static inline void cifs_readpage_to_fscache(struct inode *inode,
+ struct page *page)
+{
+ if (PageFsCache(page))
+ __cifs_readpage_to_fscache(inode, page);
+}
+
+#else /* CONFIG_CIFS_FSCACHE */
+static inline int cifs_fscache_register(void) { return 0; }
+static inline void cifs_fscache_unregister(void) {}
+
+static inline void
+cifs_fscache_get_client_cookie(struct TCP_Server_Info *server) {}
+static inline void
+cifs_fscache_release_client_cookie(struct TCP_Server_Info *server) {}
+static inline void cifs_fscache_get_super_cookie(struct cifsTconInfo *tcon) {}
+static inline void
+cifs_fscache_release_super_cookie(struct cifsTconInfo *tcon) {}
+
+static inline void cifs_fscache_release_inode_cookie(struct inode *inode) {}
+static inline void cifs_fscache_set_inode_cookie(struct inode *inode,
+ struct file *filp) {}
+static inline void cifs_fscache_reset_inode_cookie(struct inode *inode) {}
+static inline int cifs_fscache_release_page(struct page *page, gfp_t gfp)
+{
+ return 1; /* May release page */
+}
+
+static inline void cifs_fscache_invalidate_page(struct page *page,
+ struct inode *inode) {}
+static inline int
+cifs_readpage_from_fscache(struct inode *inode, struct page *page)
+{
+ return -ENOBUFS;
+}
+
+static inline int cifs_readpages_from_fscache(struct inode *inode,
+ struct address_space *mapping,
+ struct list_head *pages,
+ unsigned *nr_pages)
+{
+ return -ENOBUFS;
+}
+
+static inline void cifs_readpage_to_fscache(struct inode *inode,
+ struct page *page) {}
+
+#endif /* CONFIG_CIFS_FSCACHE */
+
+#endif /* _CIFS_FSCACHE_H */
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 62b324f26a5..a15b3a9bbff 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -29,6 +29,7 @@
#include "cifsproto.h"
#include "cifs_debug.h"
#include "cifs_fs_sb.h"
+#include "fscache.h"
static void cifs_set_ops(struct inode *inode, const bool is_dfs_referral)
@@ -288,7 +289,7 @@ int cifs_get_file_info_unix(struct file *filp)
struct inode *inode = filp->f_path.dentry->d_inode;
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct cifsTconInfo *tcon = cifs_sb->tcon;
- struct cifsFileInfo *cfile = (struct cifsFileInfo *) filp->private_data;
+ struct cifsFileInfo *cfile = filp->private_data;
xid = GetXid();
rc = CIFSSMBUnixQFileInfo(xid, tcon, cfile->netfid, &find_data);
@@ -515,7 +516,7 @@ int cifs_get_file_info(struct file *filp)
struct inode *inode = filp->f_path.dentry->d_inode;
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct cifsTconInfo *tcon = cifs_sb->tcon;
- struct cifsFileInfo *cfile = (struct cifsFileInfo *) filp->private_data;
+ struct cifsFileInfo *cfile = filp->private_data;
xid = GetXid();
rc = CIFSSMBQFileInfo(xid, tcon, cfile->netfid, &find_data);
@@ -723,9 +724,14 @@ cifs_find_inode(struct inode *inode, void *opaque)
{
struct cifs_fattr *fattr = (struct cifs_fattr *) opaque;
+ /* don't match inode with different uniqueid */
if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid)
return 0;
+ /* don't match inode of different type */
+ if ((inode->i_mode & S_IFMT) != (fattr->cf_mode & S_IFMT))
+ return 0;
+
/*
* uh oh -- it's a directory. We can't use it since hardlinked dirs are
* verboten. Disable serverino and return it as if it were found, the
@@ -776,6 +782,10 @@ retry_iget5_locked:
inode->i_flags |= S_NOATIME | S_NOCMTIME;
if (inode->i_state & I_NEW) {
inode->i_ino = hash;
+#ifdef CONFIG_CIFS_FSCACHE
+ /* initialize per-inode cache cookie pointer */
+ CIFS_I(inode)->fscache = NULL;
+#endif
unlock_new_inode(inode);
}
}
@@ -807,6 +817,11 @@ struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino)
if (!inode)
return ERR_PTR(-ENOMEM);
+#ifdef CONFIG_CIFS_FSCACHE
+ /* populate tcon->resource_id */
+ cifs_sb->tcon->resource_id = CIFS_I(inode)->uniqueid;
+#endif
+
if (rc && cifs_sb->tcon->ipc) {
cFYI(1, "ipc connection - fake read inode");
inode->i_mode |= S_IFDIR;
@@ -1401,6 +1416,10 @@ cifs_do_rename(int xid, struct dentry *from_dentry, const char *fromPath,
if (rc == 0 || rc != -ETXTBSY)
return rc;
+ /* open-file renames don't work across directories */
+ if (to_dentry->d_parent != from_dentry->d_parent)
+ return rc;
+
/* open the file to be renamed -- we need DELETE perms */
rc = CIFSSMBOpen(xid, pTcon, fromPath, FILE_OPEN, DELETE,
CREATE_NOT_DIR, &srcfid, &oplock, NULL,
@@ -1564,6 +1583,7 @@ cifs_invalidate_mapping(struct inode *inode)
cifs_i->write_behind_rc = rc;
}
invalidate_remote_inode(inode);
+ cifs_fscache_reset_inode_cookie(inode);
}
int cifs_revalidate_file(struct file *filp)
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 505926f1ee6..9d38a71c8e1 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -41,8 +41,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
__u64 ExtAttrMask = 0;
__u64 caps;
struct cifsTconInfo *tcon;
- struct cifsFileInfo *pSMBFile =
- (struct cifsFileInfo *)filep->private_data;
+ struct cifsFileInfo *pSMBFile = filep->private_data;
#endif /* CONFIG_CIFS_POSIX */
xid = GetXid();
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index d35d52889cb..c6721ee26db 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -61,6 +61,7 @@ static const struct smb_to_posix_error mapping_table_ERRDOS[] = {
{ERRremcd, -EACCES},
{ERRdiffdevice, -EXDEV},
{ERRnofiles, -ENOENT},
+ {ERRwriteprot, -EROFS},
{ERRbadshare, -ETXTBSY},
{ERRlock, -EACCES},
{ERRunsup, -EINVAL},
@@ -164,7 +165,7 @@ cifs_inet_pton(const int address_family, const char *cp, void *dst)
* Returns 0 on failure.
*/
int
-cifs_convert_address(char *src, void *dst)
+cifs_convert_address(struct sockaddr *dst, char *src)
{
int rc;
char *pct, *endp;
@@ -201,6 +202,27 @@ cifs_convert_address(char *src, void *dst)
return rc;
}
+int
+cifs_fill_sockaddr(struct sockaddr *dst, char *src,
+ const unsigned short int port)
+{
+ if (!cifs_convert_address(dst, src))
+ return 0;
+
+ switch (dst->sa_family) {
+ case AF_INET:
+ ((struct sockaddr_in *)dst)->sin_port = htons(port);
+ break;
+ case AF_INET6:
+ ((struct sockaddr_in6 *)dst)->sin6_port = htons(port);
+ break;
+ default:
+ return 0;
+ }
+
+ return 1;
+}
+
/*****************************************************************************
convert a NT status code to a dos class/code
*****************************************************************************/
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index daf1753af67..d5e591fab47 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -847,6 +847,11 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + max_len;
tmp_buf = kmalloc(UNICODE_NAME_MAX, GFP_KERNEL);
+ if (tmp_buf == NULL) {
+ rc = -ENOMEM;
+ break;
+ }
+
for (i = 0; (i < num_to_fill) && (rc == 0); i++) {
if (current_entry == NULL) {
/* evaluate whether this case is an error */
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 7707389bdf2..0a57cb7db5d 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -730,15 +730,7 @@ ssetup_ntlmssp_authenticate:
/* calculate session key */
setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp);
- if (first_time) /* should this be moved into common code
- with similar ntlmv2 path? */
- /* cifs_calculate_ntlmv2_mac_key(ses->server->mac_signing_key,
- response BB FIXME, v2_sess_key); */
-
- /* copy session key */
-
- /* memcpy(bcc_ptr, (char *)ntlm_session_key,LM2_SESS_KEY_SIZE);
- bcc_ptr += LM2_SESS_KEY_SIZE; */
+ /* FIXME: calculate MAC key */
memcpy(bcc_ptr, (char *)v2_sess_key,
sizeof(struct ntlmv2_resp));
bcc_ptr += sizeof(struct ntlmv2_resp);
diff --git a/fs/cifs/smberr.h b/fs/cifs/smberr.h
index c5084d27db7..7f16cb825fe 100644
--- a/fs/cifs/smberr.h
+++ b/fs/cifs/smberr.h
@@ -76,6 +76,7 @@
#define ERRnofiles 18 /* A File Search command can find no
more files matching the specified
criteria. */
+#define ERRwriteprot 19 /* media is write protected */
#define ERRgeneral 31
#define ERRbadshare 32 /* The sharing mode specified for an
Open conflicts with existing FIDs on
diff --git a/fs/dcache.c b/fs/dcache.c
index d96047b4a63..86d4db15473 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -590,6 +590,8 @@ static void prune_dcache(int count)
up_read(&sb->s_umount);
}
spin_lock(&sb_lock);
+ /* lock was dropped, must reset next */
+ list_safe_reset_next(sb, n, s_list);
count -= pruned;
__put_super(sb);
/* more work left to do? */
@@ -894,7 +896,7 @@ EXPORT_SYMBOL(shrink_dcache_parent);
*
* In this case we return -1 to tell the caller that we baled.
*/
-static int shrink_dcache_memory(int nr, gfp_t gfp_mask)
+static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
{
if (nr) {
if (!(gfp_mask & __GFP_FS))
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 7600aacf531..a10cb91cade 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -218,7 +218,7 @@ static struct page *dio_get_page(struct dio *dio)
* filesystems can use it to hold additional state between get_block calls and
* dio_complete.
*/
-static int dio_complete(struct dio *dio, loff_t offset, int ret)
+static int dio_complete(struct dio *dio, loff_t offset, int ret, bool is_async)
{
ssize_t transferred = 0;
@@ -239,14 +239,6 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret)
transferred = dio->i_size - offset;
}
- if (dio->end_io && dio->result)
- dio->end_io(dio->iocb, offset, transferred,
- dio->map_bh.b_private);
-
- if (dio->flags & DIO_LOCKING)
- /* lockdep: non-owner release */
- up_read_non_owner(&dio->inode->i_alloc_sem);
-
if (ret == 0)
ret = dio->page_errors;
if (ret == 0)
@@ -254,6 +246,17 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret)
if (ret == 0)
ret = transferred;
+ if (dio->end_io && dio->result) {
+ dio->end_io(dio->iocb, offset, transferred,
+ dio->map_bh.b_private, ret, is_async);
+ } else if (is_async) {
+ aio_complete(dio->iocb, ret, 0);
+ }
+
+ if (dio->flags & DIO_LOCKING)
+ /* lockdep: non-owner release */
+ up_read_non_owner(&dio->inode->i_alloc_sem);
+
return ret;
}
@@ -277,8 +280,7 @@ static void dio_bio_end_aio(struct bio *bio, int error)
spin_unlock_irqrestore(&dio->bio_lock, flags);
if (remaining == 0) {
- int ret = dio_complete(dio, dio->iocb->ki_pos, 0);
- aio_complete(dio->iocb, ret, 0);
+ dio_complete(dio, dio->iocb->ki_pos, 0, true);
kfree(dio);
}
}
@@ -1126,7 +1128,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
spin_unlock_irqrestore(&dio->bio_lock, flags);
if (ret2 == 0) {
- ret = dio_complete(dio, offset, ret);
+ ret = dio_complete(dio, offset, ret, false);
kfree(dio);
} else
BUG_ON(ret != -EIOCBQUEUED);
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index 2d8dbce9d48..46c4dd8dfcc 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -31,9 +31,9 @@ static struct mutex ecryptfs_msg_ctx_lists_mux;
static struct hlist_head *ecryptfs_daemon_hash;
struct mutex ecryptfs_daemon_hash_mux;
-static int ecryptfs_hash_buckets;
+static int ecryptfs_hash_bits;
#define ecryptfs_uid_hash(uid) \
- hash_long((unsigned long)uid, ecryptfs_hash_buckets)
+ hash_long((unsigned long)uid, ecryptfs_hash_bits)
static u32 ecryptfs_msg_counter;
static struct ecryptfs_msg_ctx *ecryptfs_msg_ctx_arr;
@@ -486,18 +486,19 @@ int ecryptfs_init_messaging(void)
}
mutex_init(&ecryptfs_daemon_hash_mux);
mutex_lock(&ecryptfs_daemon_hash_mux);
- ecryptfs_hash_buckets = 1;
- while (ecryptfs_number_of_users >> ecryptfs_hash_buckets)
- ecryptfs_hash_buckets++;
+ ecryptfs_hash_bits = 1;
+ while (ecryptfs_number_of_users >> ecryptfs_hash_bits)
+ ecryptfs_hash_bits++;
ecryptfs_daemon_hash = kmalloc((sizeof(struct hlist_head)
- * ecryptfs_hash_buckets), GFP_KERNEL);
+ * (1 << ecryptfs_hash_bits)),
+ GFP_KERNEL);
if (!ecryptfs_daemon_hash) {
rc = -ENOMEM;
printk(KERN_ERR "%s: Failed to allocate memory\n", __func__);
mutex_unlock(&ecryptfs_daemon_hash_mux);
goto out;
}
- for (i = 0; i < ecryptfs_hash_buckets; i++)
+ for (i = 0; i < (1 << ecryptfs_hash_bits); i++)
INIT_HLIST_HEAD(&ecryptfs_daemon_hash[i]);
mutex_unlock(&ecryptfs_daemon_hash_mux);
ecryptfs_msg_ctx_arr = kmalloc((sizeof(struct ecryptfs_msg_ctx)
@@ -554,7 +555,7 @@ void ecryptfs_release_messaging(void)
int i;
mutex_lock(&ecryptfs_daemon_hash_mux);
- for (i = 0; i < ecryptfs_hash_buckets; i++) {
+ for (i = 0; i < (1 << ecryptfs_hash_bits); i++) {
int rc;
hlist_for_each_entry(daemon, elem,
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index ca7e2a0ed98..2bcc0431bad 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -200,6 +200,7 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
return error;
else {
inode->i_mode = mode;
+ inode->i_ctime = CURRENT_TIME_SEC;
mark_inode_dirty(inode);
if (error == 0)
acl = NULL;
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 01552abbca3..8a11fe21218 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -205,6 +205,7 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
return error;
else {
inode->i_mode = mode;
+ inode->i_ctime = CURRENT_TIME_SEC;
ext3_mark_inode_dirty(handle, inode);
if (error == 0)
acl = NULL;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 42272d67955..0afc8c1d8cf 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3775,7 +3775,8 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags)
}
static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
- ssize_t size, void *private)
+ ssize_t size, void *private, int ret,
+ bool is_async)
{
ext4_io_end_t *io_end = iocb->private;
struct workqueue_struct *wq;
@@ -3784,7 +3785,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
/* if not async direct IO or dio with 0 bytes write, just return */
if (!io_end || !size)
- return;
+ goto out;
ext_debug("ext4_end_io_dio(): io_end 0x%p"
"for inode %lu, iocb 0x%p, offset %llu, size %llu\n",
@@ -3795,7 +3796,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
if (io_end->flag != EXT4_IO_UNWRITTEN){
ext4_free_io_end(io_end);
iocb->private = NULL;
- return;
+ goto out;
}
io_end->offset = offset;
@@ -3812,6 +3813,9 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
list_add_tail(&io_end->list, &ei->i_completed_io_list);
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
iocb->private = NULL;
+out:
+ if (is_async)
+ aio_complete(iocb, ret, 0);
}
static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 51e11bf5708..9d175d623aa 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -733,12 +733,14 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
{
while (fa) {
struct fown_struct *fown;
+ unsigned long flags;
+
if (fa->magic != FASYNC_MAGIC) {
printk(KERN_ERR "kill_fasync: bad magic number in "
"fasync_struct!\n");
return;
}
- spin_lock(&fa->fa_lock);
+ spin_lock_irqsave(&fa->fa_lock, flags);
if (fa->fa_file) {
fown = &fa->fa_file->f_owner;
/* Don't send SIGURG to processes which have not set a
@@ -747,7 +749,7 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
if (!(sig == SIGURG && fown->signum == 0))
send_sigio(fown, fa->fa_fd, band);
}
- spin_unlock(&fa->fa_lock);
+ spin_unlock_irqrestore(&fa->fa_lock, flags);
fa = rcu_dereference(fa->fa_next);
}
}
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 1d1088f48bc..d5be1693ac9 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -38,51 +38,18 @@ int nr_pdflush_threads;
/*
* Passed into wb_writeback(), essentially a subset of writeback_control
*/
-struct wb_writeback_args {
+struct wb_writeback_work {
long nr_pages;
struct super_block *sb;
enum writeback_sync_modes sync_mode;
unsigned int for_kupdate:1;
unsigned int range_cyclic:1;
unsigned int for_background:1;
-};
-/*
- * Work items for the bdi_writeback threads
- */
-struct bdi_work {
struct list_head list; /* pending work list */
- struct rcu_head rcu_head; /* for RCU free/clear of work */
-
- unsigned long seen; /* threads that have seen this work */
- atomic_t pending; /* number of threads still to do work */
-
- struct wb_writeback_args args; /* writeback arguments */
-
- unsigned long state; /* flag bits, see WS_* */
+ struct completion *done; /* set if the caller waits */
};
-enum {
- WS_USED_B = 0,
- WS_ONSTACK_B,
-};
-
-#define WS_USED (1 << WS_USED_B)
-#define WS_ONSTACK (1 << WS_ONSTACK_B)
-
-static inline bool bdi_work_on_stack(struct bdi_work *work)
-{
- return test_bit(WS_ONSTACK_B, &work->state);
-}
-
-static inline void bdi_work_init(struct bdi_work *work,
- struct wb_writeback_args *args)
-{
- INIT_RCU_HEAD(&work->rcu_head);
- work->args = *args;
- work->state = WS_USED;
-}
-
/**
* writeback_in_progress - determine whether there is writeback in progress
* @bdi: the device's backing_dev_info structure.
@@ -95,76 +62,11 @@ int writeback_in_progress(struct backing_dev_info *bdi)
return !list_empty(&bdi->work_list);
}
-static void bdi_work_clear(struct bdi_work *work)
-{
- clear_bit(WS_USED_B, &work->state);
- smp_mb__after_clear_bit();
- /*
- * work can have disappeared at this point. bit waitq functions
- * should be able to tolerate this, provided bdi_sched_wait does
- * not dereference it's pointer argument.
- */
- wake_up_bit(&work->state, WS_USED_B);
-}
-
-static void bdi_work_free(struct rcu_head *head)
-{
- struct bdi_work *work = container_of(head, struct bdi_work, rcu_head);
-
- if (!bdi_work_on_stack(work))
- kfree(work);
- else
- bdi_work_clear(work);
-}
-
-static void wb_work_complete(struct bdi_work *work)
-{
- const enum writeback_sync_modes sync_mode = work->args.sync_mode;
- int onstack = bdi_work_on_stack(work);
-
- /*
- * For allocated work, we can clear the done/seen bit right here.
- * For on-stack work, we need to postpone both the clear and free
- * to after the RCU grace period, since the stack could be invalidated
- * as soon as bdi_work_clear() has done the wakeup.
- */
- if (!onstack)
- bdi_work_clear(work);
- if (sync_mode == WB_SYNC_NONE || onstack)
- call_rcu(&work->rcu_head, bdi_work_free);
-}
-
-static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work)
-{
- /*
- * The caller has retrieved the work arguments from this work,
- * drop our reference. If this is the last ref, delete and free it
- */
- if (atomic_dec_and_test(&work->pending)) {
- struct backing_dev_info *bdi = wb->bdi;
-
- spin_lock(&bdi->wb_lock);
- list_del_rcu(&work->list);
- spin_unlock(&bdi->wb_lock);
-
- wb_work_complete(work);
- }
-}
-
-static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work)
+static void bdi_queue_work(struct backing_dev_info *bdi,
+ struct wb_writeback_work *work)
{
- work->seen = bdi->wb_mask;
- BUG_ON(!work->seen);
- atomic_set(&work->pending, bdi->wb_cnt);
- BUG_ON(!bdi->wb_cnt);
-
- /*
- * list_add_tail_rcu() contains the necessary barriers to
- * make sure the above stores are seen before the item is
- * noticed on the list
- */
spin_lock(&bdi->wb_lock);
- list_add_tail_rcu(&work->list, &bdi->work_list);
+ list_add_tail(&work->list, &bdi->work_list);
spin_unlock(&bdi->wb_lock);
/*
@@ -181,97 +83,59 @@ static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work)
}
}
-/*
- * Used for on-stack allocated work items. The caller needs to wait until
- * the wb threads have acked the work before it's safe to continue.
- */
-static void bdi_wait_on_work_clear(struct bdi_work *work)
-{
- wait_on_bit(&work->state, WS_USED_B, bdi_sched_wait,
- TASK_UNINTERRUPTIBLE);
-}
-
-static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
- struct wb_writeback_args *args)
+static void
+__bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
+ bool range_cyclic, bool for_background)
{
- struct bdi_work *work;
+ struct wb_writeback_work *work;
/*
* This is WB_SYNC_NONE writeback, so if allocation fails just
* wakeup the thread for old dirty data writeback
*/
- work = kmalloc(sizeof(*work), GFP_ATOMIC);
- if (work) {
- bdi_work_init(work, args);
- bdi_queue_work(bdi, work);
- } else {
- struct bdi_writeback *wb = &bdi->wb;
-
- if (wb->task)
- wake_up_process(wb->task);
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work) {
+ if (bdi->wb.task)
+ wake_up_process(bdi->wb.task);
+ return;
}
+
+ work->sync_mode = WB_SYNC_NONE;
+ work->nr_pages = nr_pages;
+ work->range_cyclic = range_cyclic;
+ work->for_background = for_background;
+
+ bdi_queue_work(bdi, work);
}
/**
- * bdi_sync_writeback - start and wait for writeback
+ * bdi_start_writeback - start writeback
* @bdi: the backing device to write from
- * @sb: write inodes from this super_block
+ * @nr_pages: the number of pages to write
*
* Description:
- * This does WB_SYNC_ALL data integrity writeback and waits for the
- * IO to complete. Callers must hold the sb s_umount semaphore for
- * reading, to avoid having the super disappear before we are done.
+ * This does WB_SYNC_NONE opportunistic writeback. The IO is only
+ * started when this function returns, we make no guarentees on
+ * completion. Caller need not hold sb s_umount semaphore.
+ *
*/
-static void bdi_sync_writeback(struct backing_dev_info *bdi,
- struct super_block *sb)
+void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
{
- struct wb_writeback_args args = {
- .sb = sb,
- .sync_mode = WB_SYNC_ALL,
- .nr_pages = LONG_MAX,
- .range_cyclic = 0,
- };
- struct bdi_work work;
-
- bdi_work_init(&work, &args);
- work.state |= WS_ONSTACK;
-
- bdi_queue_work(bdi, &work);
- bdi_wait_on_work_clear(&work);
+ __bdi_start_writeback(bdi, nr_pages, true, false);
}
/**
- * bdi_start_writeback - start writeback
+ * bdi_start_background_writeback - start background writeback
* @bdi: the backing device to write from
- * @sb: write inodes from this super_block
- * @nr_pages: the number of pages to write
*
* Description:
- * This does WB_SYNC_NONE opportunistic writeback. The IO is only
+ * This does WB_SYNC_NONE background writeback. The IO is only
* started when this function returns, we make no guarentees on
* completion. Caller need not hold sb s_umount semaphore.
- *
*/
-void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
- long nr_pages)
+void bdi_start_background_writeback(struct backing_dev_info *bdi)
{
- struct wb_writeback_args args = {
- .sb = sb,
- .sync_mode = WB_SYNC_NONE,
- .nr_pages = nr_pages,
- .range_cyclic = 1,
- };
-
- /*
- * We treat @nr_pages=0 as the special case to do background writeback,
- * ie. to sync pages until the background dirty threshold is reached.
- */
- if (!nr_pages) {
- args.nr_pages = LONG_MAX;
- args.for_background = 1;
- }
-
- bdi_alloc_queue_work(bdi, &args);
+ __bdi_start_writeback(bdi, LONG_MAX, true, true);
}
/*
@@ -561,75 +425,69 @@ select_queue:
return ret;
}
-static void unpin_sb_for_writeback(struct super_block *sb)
-{
- up_read(&sb->s_umount);
- put_super(sb);
-}
-
-enum sb_pin_state {
- SB_PINNED,
- SB_NOT_PINNED,
- SB_PIN_FAILED
-};
-
/*
- * For WB_SYNC_NONE writeback, the caller does not have the sb pinned
+ * For background writeback the caller does not have the sb pinned
* before calling writeback. So make sure that we do pin it, so it doesn't
* go away while we are writing inodes from it.
*/
-static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc,
- struct super_block *sb)
+static bool pin_sb_for_writeback(struct super_block *sb)
{
- /*
- * Caller must already hold the ref for this
- */
- if (wbc->sync_mode == WB_SYNC_ALL) {
- WARN_ON(!rwsem_is_locked(&sb->s_umount));
- return SB_NOT_PINNED;
- }
spin_lock(&sb_lock);
+ if (list_empty(&sb->s_instances)) {
+ spin_unlock(&sb_lock);
+ return false;
+ }
+
sb->s_count++;
+ spin_unlock(&sb_lock);
+
if (down_read_trylock(&sb->s_umount)) {
- if (sb->s_root) {
- spin_unlock(&sb_lock);
- return SB_PINNED;
- }
- /*
- * umounted, drop rwsem again and fall through to failure
- */
+ if (sb->s_root)
+ return true;
up_read(&sb->s_umount);
}
- sb->s_count--;
- spin_unlock(&sb_lock);
- return SB_PIN_FAILED;
+
+ put_super(sb);
+ return false;
}
/*
* Write a portion of b_io inodes which belong to @sb.
- * If @wbc->sb != NULL, then find and write all such
+ *
+ * If @only_this_sb is true, then find and write all such
* inodes. Otherwise write only ones which go sequentially
* in reverse order.
+ *
* Return 1, if the caller writeback routine should be
* interrupted. Otherwise return 0.
*/
-static int writeback_sb_inodes(struct super_block *sb,
- struct bdi_writeback *wb,
- struct writeback_control *wbc)
+static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
+ struct writeback_control *wbc, bool only_this_sb)
{
while (!list_empty(&wb->b_io)) {
long pages_skipped;
struct inode *inode = list_entry(wb->b_io.prev,
struct inode, i_list);
- if (wbc->sb && sb != inode->i_sb) {
- /* super block given and doesn't
- match, skip this inode */
- redirty_tail(inode);
- continue;
- }
- if (sb != inode->i_sb)
- /* finish with this superblock */
+
+ if (inode->i_sb != sb) {
+ if (only_this_sb) {
+ /*
+ * We only want to write back data for this
+ * superblock, move all inodes not belonging
+ * to it back onto the dirty list.
+ */
+ redirty_tail(inode);
+ continue;
+ }
+
+ /*
+ * The inode belongs to a different superblock.
+ * Bounce back to the caller to unpin this and
+ * pin the next superblock.
+ */
return 0;
+ }
+
if (inode->i_state & (I_NEW | I_WILL_FREE)) {
requeue_io(inode);
continue;
@@ -667,8 +525,8 @@ static int writeback_sb_inodes(struct super_block *sb,
return 1;
}
-static void writeback_inodes_wb(struct bdi_writeback *wb,
- struct writeback_control *wbc)
+void writeback_inodes_wb(struct bdi_writeback *wb,
+ struct writeback_control *wbc)
{
int ret = 0;
@@ -681,24 +539,14 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
struct inode *inode = list_entry(wb->b_io.prev,
struct inode, i_list);
struct super_block *sb = inode->i_sb;
- enum sb_pin_state state;
- if (wbc->sb && sb != wbc->sb) {
- /* super block given and doesn't
- match, skip this inode */
- redirty_tail(inode);
- continue;
- }
- state = pin_sb_for_writeback(wbc, sb);
-
- if (state == SB_PIN_FAILED) {
+ if (!pin_sb_for_writeback(sb)) {
requeue_io(inode);
continue;
}
- ret = writeback_sb_inodes(sb, wb, wbc);
+ ret = writeback_sb_inodes(sb, wb, wbc, false);
+ drop_super(sb);
- if (state == SB_PINNED)
- unpin_sb_for_writeback(sb);
if (ret)
break;
}
@@ -706,11 +554,17 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
/* Leave any unwritten inodes on b_io */
}
-void writeback_inodes_wbc(struct writeback_control *wbc)
+static void __writeback_inodes_sb(struct super_block *sb,
+ struct bdi_writeback *wb, struct writeback_control *wbc)
{
- struct backing_dev_info *bdi = wbc->bdi;
+ WARN_ON(!rwsem_is_locked(&sb->s_umount));
- writeback_inodes_wb(&bdi->wb, wbc);
+ wbc->wb_start = jiffies; /* livelock avoidance */
+ spin_lock(&inode_lock);
+ if (!wbc->for_kupdate || list_empty(&wb->b_io))
+ queue_io(wb, wbc->older_than_this);
+ writeback_sb_inodes(sb, wb, wbc, true);
+ spin_unlock(&inode_lock);
}
/*
@@ -748,16 +602,14 @@ static inline bool over_bground_thresh(void)
* all dirty pages if they are all attached to "old" mappings.
*/
static long wb_writeback(struct bdi_writeback *wb,
- struct wb_writeback_args *args)
+ struct wb_writeback_work *work)
{
struct writeback_control wbc = {
- .bdi = wb->bdi,
- .sb = args->sb,
- .sync_mode = args->sync_mode,
+ .sync_mode = work->sync_mode,
.older_than_this = NULL,
- .for_kupdate = args->for_kupdate,
- .for_background = args->for_background,
- .range_cyclic = args->range_cyclic,
+ .for_kupdate = work->for_kupdate,
+ .for_background = work->for_background,
+ .range_cyclic = work->range_cyclic,
};
unsigned long oldest_jif;
long wrote = 0;
@@ -777,21 +629,24 @@ static long wb_writeback(struct bdi_writeback *wb,
/*
* Stop writeback when nr_pages has been consumed
*/
- if (args->nr_pages <= 0)
+ if (work->nr_pages <= 0)
break;
/*
* For background writeout, stop when we are below the
* background dirty threshold
*/
- if (args->for_background && !over_bground_thresh())
+ if (work->for_background && !over_bground_thresh())
break;
wbc.more_io = 0;
wbc.nr_to_write = MAX_WRITEBACK_PAGES;
wbc.pages_skipped = 0;
- writeback_inodes_wb(wb, &wbc);
- args->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
+ if (work->sb)
+ __writeback_inodes_sb(work->sb, wb, &wbc);
+ else
+ writeback_inodes_wb(wb, &wbc);
+ work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write;
/*
@@ -827,31 +682,21 @@ static long wb_writeback(struct bdi_writeback *wb,
}
/*
- * Return the next bdi_work struct that hasn't been processed by this
- * wb thread yet. ->seen is initially set for each thread that exists
- * for this device, when a thread first notices a piece of work it
- * clears its bit. Depending on writeback type, the thread will notify
- * completion on either receiving the work (WB_SYNC_NONE) or after
- * it is done (WB_SYNC_ALL).
+ * Return the next wb_writeback_work struct that hasn't been processed yet.
*/
-static struct bdi_work *get_next_work_item(struct backing_dev_info *bdi,
- struct bdi_writeback *wb)
+static struct wb_writeback_work *
+get_next_work_item(struct backing_dev_info *bdi, struct bdi_writeback *wb)
{
- struct bdi_work *work, *ret = NULL;
+ struct wb_writeback_work *work = NULL;
- rcu_read_lock();
-
- list_for_each_entry_rcu(work, &bdi->work_list, list) {
- if (!test_bit(wb->nr, &work->seen))
- continue;
- clear_bit(wb->nr, &work->seen);
-
- ret = work;
- break;
+ spin_lock(&bdi->wb_lock);
+ if (!list_empty(&bdi->work_list)) {
+ work = list_entry(bdi->work_list.next,
+ struct wb_writeback_work, list);
+ list_del_init(&work->list);
}
-
- rcu_read_unlock();
- return ret;
+ spin_unlock(&bdi->wb_lock);
+ return work;
}
static long wb_check_old_data_flush(struct bdi_writeback *wb)
@@ -876,14 +721,14 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
(inodes_stat.nr_inodes - inodes_stat.nr_unused);
if (nr_pages) {
- struct wb_writeback_args args = {
+ struct wb_writeback_work work = {
.nr_pages = nr_pages,
.sync_mode = WB_SYNC_NONE,
.for_kupdate = 1,
.range_cyclic = 1,
};
- return wb_writeback(wb, &args);
+ return wb_writeback(wb, &work);
}
return 0;
@@ -895,33 +740,27 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
{
struct backing_dev_info *bdi = wb->bdi;
- struct bdi_work *work;
+ struct wb_writeback_work *work;
long wrote = 0;
while ((work = get_next_work_item(bdi, wb)) != NULL) {
- struct wb_writeback_args args = work->args;
-
/*
* Override sync mode, in case we must wait for completion
+ * because this thread is exiting now.
*/
if (force_wait)
- work->args.sync_mode = args.sync_mode = WB_SYNC_ALL;
-
- /*
- * If this isn't a data integrity operation, just notify
- * that we have seen this work and we are now starting it.
- */
- if (args.sync_mode == WB_SYNC_NONE)
- wb_clear_pending(wb, work);
+ work->sync_mode = WB_SYNC_ALL;
- wrote += wb_writeback(wb, &args);
+ wrote += wb_writeback(wb, work);
/*
- * This is a data integrity writeback, so only do the
- * notification when we have completed the work.
+ * Notify the caller of completion if this is a synchronous
+ * work item, otherwise just free it.
*/
- if (args.sync_mode == WB_SYNC_ALL)
- wb_clear_pending(wb, work);
+ if (work->done)
+ complete(work->done);
+ else
+ kfree(work);
}
/*
@@ -978,42 +817,27 @@ int bdi_writeback_task(struct bdi_writeback *wb)
}
/*
- * Schedule writeback for all backing devices. This does WB_SYNC_NONE
- * writeback, for integrity writeback see bdi_sync_writeback().
+ * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
+ * the whole world.
*/
-static void bdi_writeback_all(struct super_block *sb, long nr_pages)
+void wakeup_flusher_threads(long nr_pages)
{
- struct wb_writeback_args args = {
- .sb = sb,
- .nr_pages = nr_pages,
- .sync_mode = WB_SYNC_NONE,
- };
struct backing_dev_info *bdi;
- rcu_read_lock();
+ if (!nr_pages) {
+ nr_pages = global_page_state(NR_FILE_DIRTY) +
+ global_page_state(NR_UNSTABLE_NFS);
+ }
+ rcu_read_lock();
list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
if (!bdi_has_dirty_io(bdi))
continue;
-
- bdi_alloc_queue_work(bdi, &args);
+ __bdi_start_writeback(bdi, nr_pages, false, false);
}
-
rcu_read_unlock();
}
-/*
- * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
- * the whole world.
- */
-void wakeup_flusher_threads(long nr_pages)
-{
- if (nr_pages == 0)
- nr_pages = global_page_state(NR_FILE_DIRTY) +
- global_page_state(NR_UNSTABLE_NFS);
- bdi_writeback_all(NULL, nr_pages);
-}
-
static noinline void block_dump___mark_inode_dirty(struct inode *inode)
{
if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
@@ -1218,12 +1042,20 @@ void writeback_inodes_sb(struct super_block *sb)
{
unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
- long nr_to_write;
+ DECLARE_COMPLETION_ONSTACK(done);
+ struct wb_writeback_work work = {
+ .sb = sb,
+ .sync_mode = WB_SYNC_NONE,
+ .done = &done,
+ };
+
+ WARN_ON(!rwsem_is_locked(&sb->s_umount));
- nr_to_write = nr_dirty + nr_unstable +
+ work.nr_pages = nr_dirty + nr_unstable +
(inodes_stat.nr_inodes - inodes_stat.nr_unused);
- bdi_start_writeback(sb->s_bdi, sb, nr_to_write);
+ bdi_queue_work(sb->s_bdi, &work);
+ wait_for_completion(&done);
}
EXPORT_SYMBOL(writeback_inodes_sb);
@@ -1237,7 +1069,9 @@ EXPORT_SYMBOL(writeback_inodes_sb);
int writeback_inodes_sb_if_idle(struct super_block *sb)
{
if (!writeback_in_progress(sb->s_bdi)) {
+ down_read(&sb->s_umount);
writeback_inodes_sb(sb);
+ up_read(&sb->s_umount);
return 1;
} else
return 0;
@@ -1253,7 +1087,20 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
*/
void sync_inodes_sb(struct super_block *sb)
{
- bdi_sync_writeback(sb->s_bdi, sb);
+ DECLARE_COMPLETION_ONSTACK(done);
+ struct wb_writeback_work work = {
+ .sb = sb,
+ .sync_mode = WB_SYNC_ALL,
+ .nr_pages = LONG_MAX,
+ .range_cyclic = 0,
+ .done = &done,
+ };
+
+ WARN_ON(!rwsem_is_locked(&sb->s_umount));
+
+ bdi_queue_work(sb->s_bdi, &work);
+ wait_for_completion(&done);
+
wait_sb_inodes(sb);
}
EXPORT_SYMBOL(sync_inodes_sb);
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 9f8b52500d6..5e96cbd8a45 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -136,10 +136,7 @@ static int gfs2_writeback_writepage(struct page *page,
if (ret <= 0)
return ret;
- ret = mpage_writepage(page, gfs2_get_block_noalloc, wbc);
- if (ret == -EAGAIN)
- ret = block_write_full_page(page, gfs2_get_block_noalloc, wbc);
- return ret;
+ return nobh_writepage(page, gfs2_get_block_noalloc, wbc);
}
/**
@@ -637,9 +634,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
}
}
- error = gfs2_write_alloc_required(ip, pos, len, &alloc_required);
- if (error)
- goto out_unlock;
+ alloc_required = gfs2_write_alloc_required(ip, pos, len);
if (alloc_required || gfs2_is_jdata(ip))
gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 4a48c0f4b40..6f482809d1a 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1040,7 +1040,8 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
goto out;
if (gfs2_is_stuffed(ip)) {
- u64 dsize = size + sizeof(struct gfs2_inode);
+ u64 dsize = size + sizeof(struct gfs2_dinode);
+ ip->i_disksize = size;
ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
@@ -1243,13 +1244,12 @@ int gfs2_file_dealloc(struct gfs2_inode *ip)
* @ip: the file being written to
* @offset: the offset to write to
* @len: the number of bytes being written
- * @alloc_required: set to 1 if an alloc is required, 0 otherwise
*
- * Returns: errno
+ * Returns: 1 if an alloc is required, 0 otherwise
*/
int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
- unsigned int len, int *alloc_required)
+ unsigned int len)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct buffer_head bh;
@@ -1257,26 +1257,23 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
u64 lblock, lblock_stop, size;
u64 end_of_file;
- *alloc_required = 0;
-
if (!len)
return 0;
if (gfs2_is_stuffed(ip)) {
if (offset + len >
sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode))
- *alloc_required = 1;
+ return 1;
return 0;
}
- *alloc_required = 1;
shift = sdp->sd_sb.sb_bsize_shift;
BUG_ON(gfs2_is_dir(ip));
end_of_file = (ip->i_disksize + sdp->sd_sb.sb_bsize - 1) >> shift;
lblock = offset >> shift;
lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
if (lblock_stop > end_of_file)
- return 0;
+ return 1;
size = (lblock_stop - lblock) << shift;
do {
@@ -1284,12 +1281,11 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
bh.b_size = size;
gfs2_block_map(&ip->i_inode, lblock, &bh, 0);
if (!buffer_mapped(&bh))
- return 0;
+ return 1;
size -= bh.b_size;
lblock += (bh.b_size >> ip->i_inode.i_blkbits);
} while(size > 0);
- *alloc_required = 0;
return 0;
}
diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h
index c983177e05a..a20a5213135 100644
--- a/fs/gfs2/bmap.h
+++ b/fs/gfs2/bmap.h
@@ -52,6 +52,6 @@ int gfs2_truncatei(struct gfs2_inode *ip, u64 size);
int gfs2_truncatei_resume(struct gfs2_inode *ip);
int gfs2_file_dealloc(struct gfs2_inode *ip);
int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
- unsigned int len, int *alloc_required);
+ unsigned int len);
#endif /* __BMAP_DOT_H__ */
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 8295c5b5d4a..b9dd88a78dd 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -392,7 +392,7 @@ static int gfs2_dirent_find_space(const struct gfs2_dirent *dent,
unsigned totlen = be16_to_cpu(dent->de_rec_len);
if (gfs2_dirent_sentinel(dent))
- actual = GFS2_DIRENT_SIZE(0);
+ actual = 0;
if (totlen - actual >= required)
return 1;
return 0;
@@ -955,7 +955,12 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
/* Change the pointers.
Don't bother distinguishing stuffed from non-stuffed.
This code is complicated enough already. */
- lp = kmalloc(half_len * sizeof(__be64), GFP_NOFS | __GFP_NOFAIL);
+ lp = kmalloc(half_len * sizeof(__be64), GFP_NOFS);
+ if (!lp) {
+ error = -ENOMEM;
+ goto fail_brelse;
+ }
+
/* Change the pointers */
for (x = 0; x < half_len; x++)
lp[x] = cpu_to_be64(bn);
@@ -1063,7 +1068,9 @@ static int dir_double_exhash(struct gfs2_inode *dip)
/* Allocate both the "from" and "to" buffers in one big chunk */
- buf = kcalloc(3, sdp->sd_hash_bsize, GFP_NOFS | __GFP_NOFAIL);
+ buf = kcalloc(3, sdp->sd_hash_bsize, GFP_NOFS);
+ if (!buf)
+ return -ENOMEM;
for (block = dip->i_disksize >> sdp->sd_hash_bsize_shift; block--;) {
error = gfs2_dir_read_data(dip, (char *)buf,
@@ -1231,6 +1238,25 @@ static int do_filldir_main(struct gfs2_inode *dip, u64 *offset,
return 0;
}
+static void *gfs2_alloc_sort_buffer(unsigned size)
+{
+ void *ptr = NULL;
+
+ if (size < KMALLOC_MAX_SIZE)
+ ptr = kmalloc(size, GFP_NOFS | __GFP_NOWARN);
+ if (!ptr)
+ ptr = __vmalloc(size, GFP_NOFS, PAGE_KERNEL);
+ return ptr;
+}
+
+static void gfs2_free_sort_buffer(void *ptr)
+{
+ if (is_vmalloc_addr(ptr))
+ vfree(ptr);
+ else
+ kfree(ptr);
+}
+
static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
filldir_t filldir, int *copied, unsigned *depth,
u64 leaf_no)
@@ -1271,7 +1297,7 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
* 99 is the maximum number of entries that can fit in a single
* leaf block.
*/
- larr = vmalloc((leaves + entries + 99) * sizeof(void *));
+ larr = gfs2_alloc_sort_buffer((leaves + entries + 99) * sizeof(void *));
if (!larr)
goto out;
darr = (const struct gfs2_dirent **)(larr + leaves);
@@ -1282,7 +1308,7 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
do {
error = get_leaf(ip, lfn, &bh);
if (error)
- goto out_kfree;
+ goto out_free;
lf = (struct gfs2_leaf *)bh->b_data;
lfn = be64_to_cpu(lf->lf_next);
if (lf->lf_entries) {
@@ -1291,7 +1317,7 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
gfs2_dirent_gather, NULL, &g);
error = PTR_ERR(dent);
if (IS_ERR(dent))
- goto out_kfree;
+ goto out_free;
if (entries2 != g.offset) {
fs_warn(sdp, "Number of entries corrupt in dir "
"leaf %llu, entries2 (%u) != "
@@ -1300,7 +1326,7 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
entries2, g.offset);
error = -EIO;
- goto out_kfree;
+ goto out_free;
}
error = 0;
larr[leaf++] = bh;
@@ -1312,10 +1338,10 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
BUG_ON(entries2 != entries);
error = do_filldir_main(ip, offset, opaque, filldir, darr,
entries, copied);
-out_kfree:
+out_free:
for(i = 0; i < leaf; i++)
brelse(larr[i]);
- vfree(larr);
+ gfs2_free_sort_buffer(larr);
out:
return error;
}
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index ed9a94f0ef1..4edd662c823 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -351,7 +351,6 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
unsigned long last_index;
u64 pos = page->index << PAGE_CACHE_SHIFT;
unsigned int data_blocks, ind_blocks, rblocks;
- int alloc_required = 0;
struct gfs2_holder gh;
struct gfs2_alloc *al;
int ret;
@@ -364,8 +363,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
set_bit(GIF_SW_PAGED, &ip->i_flags);
- ret = gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE, &alloc_required);
- if (ret || !alloc_required)
+ if (!gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE))
goto out_unlock;
ret = -ENOMEM;
al = gfs2_alloc_get(ip);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index ddcdbf49353..9adf8f924e0 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -328,6 +328,30 @@ static void gfs2_holder_wake(struct gfs2_holder *gh)
}
/**
+ * do_error - Something unexpected has happened during a lock request
+ *
+ */
+
+static inline void do_error(struct gfs2_glock *gl, const int ret)
+{
+ struct gfs2_holder *gh, *tmp;
+
+ list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
+ if (test_bit(HIF_HOLDER, &gh->gh_iflags))
+ continue;
+ if (ret & LM_OUT_ERROR)
+ gh->gh_error = -EIO;
+ else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))
+ gh->gh_error = GLR_TRYFAILED;
+ else
+ continue;
+ list_del_init(&gh->gh_list);
+ trace_gfs2_glock_queue(gh, 0);
+ gfs2_holder_wake(gh);
+ }
+}
+
+/**
* do_promote - promote as many requests as possible on the current queue
* @gl: The glock
*
@@ -375,36 +399,13 @@ restart:
}
if (gh->gh_list.prev == &gl->gl_holders)
return 1;
+ do_error(gl, 0);
break;
}
return 0;
}
/**
- * do_error - Something unexpected has happened during a lock request
- *
- */
-
-static inline void do_error(struct gfs2_glock *gl, const int ret)
-{
- struct gfs2_holder *gh, *tmp;
-
- list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
- if (test_bit(HIF_HOLDER, &gh->gh_iflags))
- continue;
- if (ret & LM_OUT_ERROR)
- gh->gh_error = -EIO;
- else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))
- gh->gh_error = GLR_TRYFAILED;
- else
- continue;
- list_del_init(&gh->gh_list);
- trace_gfs2_glock_queue(gh, 0);
- gfs2_holder_wake(gh);
- }
-}
-
-/**
* find_first_waiter - find the first gh that's waiting for the glock
* @gl: the glock
*/
@@ -1062,6 +1063,9 @@ int gfs2_glock_nq(struct gfs2_holder *gh)
spin_lock(&gl->gl_spin);
add_to_queue(gh);
+ if ((LM_FLAG_NOEXP & gh->gh_flags) &&
+ test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))
+ set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
run_queue(gl, 1);
spin_unlock(&gl->gl_spin);
@@ -1319,6 +1323,36 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
}
/**
+ * gfs2_should_freeze - Figure out if glock should be frozen
+ * @gl: The glock in question
+ *
+ * Glocks are not frozen if (a) the result of the dlm operation is
+ * an error, (b) the locking operation was an unlock operation or
+ * (c) if there is a "noexp" flagged request anywhere in the queue
+ *
+ * Returns: 1 if freezing should occur, 0 otherwise
+ */
+
+static int gfs2_should_freeze(const struct gfs2_glock *gl)
+{
+ const struct gfs2_holder *gh;
+
+ if (gl->gl_reply & ~LM_OUT_ST_MASK)
+ return 0;
+ if (gl->gl_target == LM_ST_UNLOCKED)
+ return 0;
+
+ list_for_each_entry(gh, &gl->gl_holders, gh_list) {
+ if (test_bit(HIF_HOLDER, &gh->gh_iflags))
+ continue;
+ if (LM_FLAG_NOEXP & gh->gh_flags)
+ return 0;
+ }
+
+ return 1;
+}
+
+/**
* gfs2_glock_complete - Callback used by locking
* @gl: Pointer to the glock
* @ret: The return value from the dlm
@@ -1328,18 +1362,17 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
{
struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
+
gl->gl_reply = ret;
+
if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))) {
- struct gfs2_holder *gh;
spin_lock(&gl->gl_spin);
- gh = find_first_waiter(gl);
- if ((!(gh && (gh->gh_flags & LM_FLAG_NOEXP)) &&
- (gl->gl_target != LM_ST_UNLOCKED)) ||
- ((ret & ~LM_OUT_ST_MASK) != 0))
+ if (gfs2_should_freeze(gl)) {
set_bit(GLF_FROZEN, &gl->gl_flags);
- spin_unlock(&gl->gl_spin);
- if (test_bit(GLF_FROZEN, &gl->gl_flags))
+ spin_unlock(&gl->gl_spin);
return;
+ }
+ spin_unlock(&gl->gl_spin);
}
set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
gfs2_glock_hold(gl);
@@ -1348,7 +1381,7 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
}
-static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask)
+static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
{
struct gfs2_glock *gl;
int may_demote;
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index b5d7363b22d..8fcbce48a12 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -460,6 +460,7 @@ enum {
SDF_NOBARRIERS = 3,
SDF_NORECOVERY = 4,
SDF_DEMOTE = 5,
+ SDF_NOJOURNALID = 6,
};
#define GFS2_FSNAME_LEN 256
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index b5612cbb62a..f03afd9c44b 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -169,7 +169,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb,
{
struct inode *inode;
struct gfs2_inode *ip;
- struct gfs2_glock *io_gl;
+ struct gfs2_glock *io_gl = NULL;
int error;
inode = gfs2_iget(sb, no_addr);
@@ -198,6 +198,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb,
ip->i_iopen_gh.gh_gl->gl_object = ip;
gfs2_glock_put(io_gl);
+ io_gl = NULL;
if ((type == DT_UNKNOWN) && (no_formal_ino == 0))
goto gfs2_nfsbypass;
@@ -228,7 +229,8 @@ gfs2_nfsbypass:
fail_glock:
gfs2_glock_dq(&ip->i_iopen_gh);
fail_iopen:
- gfs2_glock_put(io_gl);
+ if (io_gl)
+ gfs2_glock_put(io_gl);
fail_put:
if (inode->i_state & I_NEW)
ip->i_gl->gl_object = NULL;
@@ -256,7 +258,7 @@ void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr)
{
struct gfs2_sbd *sdp;
struct gfs2_inode *ip;
- struct gfs2_glock *io_gl;
+ struct gfs2_glock *io_gl = NULL;
int error;
struct gfs2_holder gh;
struct inode *inode;
@@ -293,6 +295,7 @@ void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr)
ip->i_iopen_gh.gh_gl->gl_object = ip;
gfs2_glock_put(io_gl);
+ io_gl = NULL;
inode->i_mode = DT2IF(DT_UNKNOWN);
@@ -319,7 +322,8 @@ void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr)
fail_glock:
gfs2_glock_dq(&ip->i_iopen_gh);
fail_iopen:
- gfs2_glock_put(io_gl);
+ if (io_gl)
+ gfs2_glock_put(io_gl);
fail_put:
ip->i_gl->gl_object = NULL;
gfs2_glock_put(ip->i_gl);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 3593b3a7290..45a4a36195d 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -76,7 +76,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
sb->s_fs_info = sdp;
sdp->sd_vfs = sb;
-
+ set_bit(SDF_NOJOURNALID, &sdp->sd_flags);
gfs2_tune_init(&sdp->sd_tune);
init_waitqueue_head(&sdp->sd_glock_wait);
@@ -1050,7 +1050,8 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent)
ret = match_int(&tmp[0], &option);
if (ret || option < 0)
goto hostdata_error;
- ls->ls_jid = option;
+ if (test_and_clear_bit(SDF_NOJOURNALID, &sdp->sd_flags))
+ ls->ls_jid = option;
break;
case Opt_id:
/* Obsolete, but left for backward compat purposes */
@@ -1102,6 +1103,24 @@ void gfs2_lm_unmount(struct gfs2_sbd *sdp)
lm->lm_unmount(sdp);
}
+static int gfs2_journalid_wait(void *word)
+{
+ if (signal_pending(current))
+ return -EINTR;
+ schedule();
+ return 0;
+}
+
+static int wait_on_journal(struct gfs2_sbd *sdp)
+{
+ if (sdp->sd_args.ar_spectator)
+ return 0;
+ if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL)
+ return 0;
+
+ return wait_on_bit(&sdp->sd_flags, SDF_NOJOURNALID, gfs2_journalid_wait, TASK_INTERRUPTIBLE);
+}
+
void gfs2_online_uevent(struct gfs2_sbd *sdp)
{
struct super_block *sb = sdp->sd_vfs;
@@ -1194,6 +1213,10 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
if (error)
goto fail_locking;
+ error = wait_on_journal(sdp);
+ if (error)
+ goto fail_sb;
+
error = init_inodes(sdp, DO);
if (error)
goto fail_sb;
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 49667d68769..8bb643cb265 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -77,7 +77,7 @@ static LIST_HEAD(qd_lru_list);
static atomic_t qd_lru_count = ATOMIC_INIT(0);
static DEFINE_SPINLOCK(qd_lru_lock);
-int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask)
+int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
{
struct gfs2_quota_data *qd;
struct gfs2_sbd *sdp;
@@ -694,10 +694,8 @@ get_a_page:
if (!buffer_mapped(bh))
goto unlock_out;
/* If it's a newly allocated disk block for quota, zero it */
- if (buffer_new(bh)) {
- memset(bh->b_data, 0, bh->b_size);
- set_buffer_uptodate(bh);
- }
+ if (buffer_new(bh))
+ zero_user(page, pos - blocksize, bh->b_size);
}
if (PageUptodate(page))
@@ -723,7 +721,7 @@ get_a_page:
/* If quota straddles page boundary, we need to update the rest of the
* quota at the beginning of the next page */
- if (offset != 0) { /* first page, offset is closer to PAGE_CACHE_SIZE */
+ if ((offset + sizeof(struct gfs2_quota)) > PAGE_CACHE_SIZE) {
ptr = ptr + nbytes;
nbytes = sizeof(struct gfs2_quota) - nbytes;
offset = 0;
@@ -789,15 +787,9 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
goto out;
for (x = 0; x < num_qd; x++) {
- int alloc_required;
-
offset = qd2offset(qda[x]);
- error = gfs2_write_alloc_required(ip, offset,
- sizeof(struct gfs2_quota),
- &alloc_required);
- if (error)
- goto out_gunlock;
- if (alloc_required)
+ if (gfs2_write_alloc_required(ip, offset,
+ sizeof(struct gfs2_quota)))
nalloc++;
}
@@ -1586,10 +1578,7 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
goto out_i;
offset = qd2offset(qd);
- error = gfs2_write_alloc_required(ip, offset, sizeof(struct gfs2_quota),
- &alloc_required);
- if (error)
- goto out_i;
+ alloc_required = gfs2_write_alloc_required(ip, offset, sizeof(struct gfs2_quota));
if (alloc_required) {
al = gfs2_alloc_get(ip);
if (al == NULL)
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 195f60c8bd1..e7d236ca48b 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -51,7 +51,7 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
return ret;
}
-extern int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask);
+extern int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask);
extern const struct quotactl_ops gfs2_quotactl_ops;
#endif /* __QUOTA_DOT_H__ */
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 4d1aad38f1b..4140811a921 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -342,8 +342,6 @@ int gfs2_jdesc_check(struct gfs2_jdesc *jd)
{
struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
- int ar;
- int error;
if (ip->i_disksize < (8 << 20) || ip->i_disksize > (1 << 30) ||
(ip->i_disksize & (sdp->sd_sb.sb_bsize - 1))) {
@@ -352,13 +350,12 @@ int gfs2_jdesc_check(struct gfs2_jdesc *jd)
}
jd->jd_blocks = ip->i_disksize >> sdp->sd_sb.sb_bsize_shift;
- error = gfs2_write_alloc_required(ip, 0, ip->i_disksize, &ar);
- if (!error && ar) {
+ if (gfs2_write_alloc_required(ip, 0, ip->i_disksize)) {
gfs2_consist_inode(ip);
- error = -EIO;
+ return -EIO;
}
- return error;
+ return 0;
}
/**
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 37f5393e68e..d019d0d55e0 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -325,6 +325,30 @@ static ssize_t lkfirst_show(struct gfs2_sbd *sdp, char *buf)
return sprintf(buf, "%d\n", ls->ls_first);
}
+static ssize_t lkfirst_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
+{
+ unsigned first;
+ int rv;
+
+ rv = sscanf(buf, "%u", &first);
+ if (rv != 1 || first > 1)
+ return -EINVAL;
+ spin_lock(&sdp->sd_jindex_spin);
+ rv = -EBUSY;
+ if (test_bit(SDF_NOJOURNALID, &sdp->sd_flags) == 0)
+ goto out;
+ rv = -EINVAL;
+ if (sdp->sd_args.ar_spectator)
+ goto out;
+ if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL)
+ goto out;
+ sdp->sd_lockstruct.ls_first = first;
+ rv = 0;
+out:
+ spin_unlock(&sdp->sd_jindex_spin);
+ return rv ? rv : len;
+}
+
static ssize_t first_done_show(struct gfs2_sbd *sdp, char *buf)
{
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
@@ -377,14 +401,41 @@ static ssize_t jid_show(struct gfs2_sbd *sdp, char *buf)
return sprintf(buf, "%u\n", sdp->sd_lockstruct.ls_jid);
}
+static ssize_t jid_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
+{
+ unsigned jid;
+ int rv;
+
+ rv = sscanf(buf, "%u", &jid);
+ if (rv != 1)
+ return -EINVAL;
+
+ spin_lock(&sdp->sd_jindex_spin);
+ rv = -EINVAL;
+ if (sdp->sd_args.ar_spectator)
+ goto out;
+ if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL)
+ goto out;
+ rv = -EBUSY;
+ if (test_and_clear_bit(SDF_NOJOURNALID, &sdp->sd_flags) == 0)
+ goto out;
+ sdp->sd_lockstruct.ls_jid = jid;
+ smp_mb__after_clear_bit();
+ wake_up_bit(&sdp->sd_flags, SDF_NOJOURNALID);
+ rv = 0;
+out:
+ spin_unlock(&sdp->sd_jindex_spin);
+ return rv ? rv : len;
+}
+
#define GDLM_ATTR(_name,_mode,_show,_store) \
static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
GDLM_ATTR(proto_name, 0444, proto_name_show, NULL);
GDLM_ATTR(block, 0644, block_show, block_store);
GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store);
-GDLM_ATTR(jid, 0444, jid_show, NULL);
-GDLM_ATTR(first, 0444, lkfirst_show, NULL);
+GDLM_ATTR(jid, 0644, jid_show, jid_store);
+GDLM_ATTR(first, 0644, lkfirst_show, lkfirst_store);
GDLM_ATTR(first_done, 0444, first_done_show, NULL);
GDLM_ATTR(recover, 0600, NULL, recover_store);
GDLM_ATTR(recover_done, 0444, recover_done_show, NULL);
@@ -564,7 +615,7 @@ static int gfs2_uevent(struct kset *kset, struct kobject *kobj,
add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name);
add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name);
- if (!sdp->sd_args.ar_spectator)
+ if (!test_bit(SDF_NOJOURNALID, &sdp->sd_flags))
add_uevent_var(env, "JOURNALID=%u", sdp->sd_lockstruct.ls_jid);
if (gfs2_uuid_valid(uuid))
add_uevent_var(env, "UUID=%pUB", uuid);
diff --git a/fs/inode.c b/fs/inode.c
index 2bee20ae3d6..722860b323a 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -512,7 +512,7 @@ static void prune_icache(int nr_to_scan)
* This function is passed the number of inodes to scan, and it returns the
* total number of remaining possibly-reclaimable inodes.
*/
-static int shrink_icache_memory(int nr, gfp_t gfp_mask)
+static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
{
if (nr) {
/*
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index bc2ff593276..036880895bf 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -297,7 +297,6 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
struct page *new_page;
unsigned int new_offset;
struct buffer_head *bh_in = jh2bh(jh_in);
- struct jbd2_buffer_trigger_type *triggers;
journal_t *journal = transaction->t_journal;
/*
@@ -328,21 +327,21 @@ repeat:
done_copy_out = 1;
new_page = virt_to_page(jh_in->b_frozen_data);
new_offset = offset_in_page(jh_in->b_frozen_data);
- triggers = jh_in->b_frozen_triggers;
} else {
new_page = jh2bh(jh_in)->b_page;
new_offset = offset_in_page(jh2bh(jh_in)->b_data);
- triggers = jh_in->b_triggers;
}
mapped_data = kmap_atomic(new_page, KM_USER0);
/*
- * Fire any commit trigger. Do this before checking for escaping,
- * as the trigger may modify the magic offset. If a copy-out
- * happens afterwards, it will have the correct data in the buffer.
+ * Fire data frozen trigger if data already wasn't frozen. Do this
+ * before checking for escaping, as the trigger may modify the magic
+ * offset. If a copy-out happens afterwards, it will have the correct
+ * data in the buffer.
*/
- jbd2_buffer_commit_trigger(jh_in, mapped_data + new_offset,
- triggers);
+ if (!done_copy_out)
+ jbd2_buffer_frozen_trigger(jh_in, mapped_data + new_offset,
+ jh_in->b_triggers);
/*
* Check for escaping
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index e214d68620a..b8e0806681b 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -725,6 +725,9 @@ done:
page = jh2bh(jh)->b_page;
offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK;
source = kmap_atomic(page, KM_USER0);
+ /* Fire data frozen trigger just before we copy the data */
+ jbd2_buffer_frozen_trigger(jh, source + offset,
+ jh->b_triggers);
memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
kunmap_atomic(source, KM_USER0);
@@ -963,15 +966,15 @@ void jbd2_journal_set_triggers(struct buffer_head *bh,
jh->b_triggers = type;
}
-void jbd2_buffer_commit_trigger(struct journal_head *jh, void *mapped_data,
+void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data,
struct jbd2_buffer_trigger_type *triggers)
{
struct buffer_head *bh = jh2bh(jh);
- if (!triggers || !triggers->t_commit)
+ if (!triggers || !triggers->t_frozen)
return;
- triggers->t_commit(triggers, bh, mapped_data, bh->b_size);
+ triggers->t_frozen(triggers, bh, mapped_data, bh->b_size);
}
void jbd2_buffer_abort_trigger(struct journal_head *jh,
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index a2d58c96f1b..d258e261bdc 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -626,7 +626,7 @@ void jffs2_xattr_free_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *i
static int check_xattr_ref_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic)
{
- /* success of check_xattr_ref_inode() means taht inode (ic) dose not have
+ /* success of check_xattr_ref_inode() means that inode (ic) dose not have
* duplicate name/value pairs. If duplicate name/value pair would be found,
* one will be removed.
*/
diff --git a/fs/mbcache.c b/fs/mbcache.c
index ec88ff3d04a..e28f21b9534 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -115,7 +115,7 @@ mb_cache_indexes(struct mb_cache *cache)
* What the mbcache registers as to get shrunk dynamically.
*/
-static int mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask);
+static int mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask);
static struct shrinker mb_cache_shrinker = {
.shrink = mb_cache_shrink_fn,
@@ -191,13 +191,14 @@ forget:
* This function is called by the kernel memory management when memory
* gets low.
*
+ * @shrink: (ignored)
* @nr_to_scan: Number of objects to scan
* @gfp_mask: (ignored)
*
* Returns the number of objects which are present in the cache.
*/
static int
-mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask)
+mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
{
LIST_HEAD(free_list);
struct list_head *l, *ltmp;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 7ec9b34a59f..d25b5257b7a 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1286,6 +1286,55 @@ static void nfs4_session_set_rwsize(struct nfs_server *server)
#endif /* CONFIG_NFS_V4_1 */
}
+static int nfs4_server_common_setup(struct nfs_server *server,
+ struct nfs_fh *mntfh)
+{
+ struct nfs_fattr *fattr;
+ int error;
+
+ BUG_ON(!server->nfs_client);
+ BUG_ON(!server->nfs_client->rpc_ops);
+ BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+
+ fattr = nfs_alloc_fattr();
+ if (fattr == NULL)
+ return -ENOMEM;
+
+ /* We must ensure the session is initialised first */
+ error = nfs4_init_session(server);
+ if (error < 0)
+ goto out;
+
+ /* Probe the root fh to retrieve its FSID and filehandle */
+ error = nfs4_get_rootfh(server, mntfh);
+ if (error < 0)
+ goto out;
+
+ dprintk("Server FSID: %llx:%llx\n",
+ (unsigned long long) server->fsid.major,
+ (unsigned long long) server->fsid.minor);
+ dprintk("Mount FH: %d\n", mntfh->size);
+
+ nfs4_session_set_rwsize(server);
+
+ error = nfs_probe_fsinfo(server, mntfh, fattr);
+ if (error < 0)
+ goto out;
+
+ if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
+ server->namelen = NFS4_MAXNAMLEN;
+
+ spin_lock(&nfs_client_lock);
+ list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
+ list_add_tail(&server->master_link, &nfs_volume_list);
+ spin_unlock(&nfs_client_lock);
+
+ server->mount_time = jiffies;
+out:
+ nfs_free_fattr(fattr);
+ return error;
+}
+
/*
* Create a version 4 volume record
*/
@@ -1346,7 +1395,6 @@ error:
struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
struct nfs_fh *mntfh)
{
- struct nfs_fattr *fattr;
struct nfs_server *server;
int error;
@@ -1356,55 +1404,19 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
if (!server)
return ERR_PTR(-ENOMEM);
- error = -ENOMEM;
- fattr = nfs_alloc_fattr();
- if (fattr == NULL)
- goto error;
-
/* set up the general RPC client */
error = nfs4_init_server(server, data);
if (error < 0)
goto error;
- BUG_ON(!server->nfs_client);
- BUG_ON(!server->nfs_client->rpc_ops);
- BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
-
- error = nfs4_init_session(server);
- if (error < 0)
- goto error;
-
- /* Probe the root fh to retrieve its FSID */
- error = nfs4_get_rootfh(server, mntfh);
+ error = nfs4_server_common_setup(server, mntfh);
if (error < 0)
goto error;
- dprintk("Server FSID: %llx:%llx\n",
- (unsigned long long) server->fsid.major,
- (unsigned long long) server->fsid.minor);
- dprintk("Mount FH: %d\n", mntfh->size);
-
- nfs4_session_set_rwsize(server);
-
- error = nfs_probe_fsinfo(server, mntfh, fattr);
- if (error < 0)
- goto error;
-
- if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
- server->namelen = NFS4_MAXNAMLEN;
-
- spin_lock(&nfs_client_lock);
- list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
- list_add_tail(&server->master_link, &nfs_volume_list);
- spin_unlock(&nfs_client_lock);
-
- server->mount_time = jiffies;
dprintk("<-- nfs4_create_server() = %p\n", server);
- nfs_free_fattr(fattr);
return server;
error:
- nfs_free_fattr(fattr);
nfs_free_server(server);
dprintk("<-- nfs4_create_server() = error %d\n", error);
return ERR_PTR(error);
@@ -1418,7 +1430,6 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
{
struct nfs_client *parent_client;
struct nfs_server *server, *parent_server;
- struct nfs_fattr *fattr;
int error;
dprintk("--> nfs4_create_referral_server()\n");
@@ -1427,11 +1438,6 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
if (!server)
return ERR_PTR(-ENOMEM);
- error = -ENOMEM;
- fattr = nfs_alloc_fattr();
- if (fattr == NULL)
- goto error;
-
parent_server = NFS_SB(data->sb);
parent_client = parent_server->nfs_client;
@@ -1456,40 +1462,14 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
if (error < 0)
goto error;
- BUG_ON(!server->nfs_client);
- BUG_ON(!server->nfs_client->rpc_ops);
- BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
-
- /* Probe the root fh to retrieve its FSID and filehandle */
- error = nfs4_get_rootfh(server, mntfh);
- if (error < 0)
- goto error;
-
- /* probe the filesystem info for this server filesystem */
- error = nfs_probe_fsinfo(server, mntfh, fattr);
+ error = nfs4_server_common_setup(server, mntfh);
if (error < 0)
goto error;
- if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
- server->namelen = NFS4_MAXNAMLEN;
-
- dprintk("Referral FSID: %llx:%llx\n",
- (unsigned long long) server->fsid.major,
- (unsigned long long) server->fsid.minor);
-
- spin_lock(&nfs_client_lock);
- list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
- list_add_tail(&server->master_link, &nfs_volume_list);
- spin_unlock(&nfs_client_lock);
-
- server->mount_time = jiffies;
-
- nfs_free_fattr(fattr);
dprintk("<-- nfs_create_referral_server() = %p\n", server);
return server;
error:
- nfs_free_fattr(fattr);
nfs_free_server(server);
dprintk("<-- nfs4_create_referral_server() = error %d\n", error);
return ERR_PTR(error);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 782b431ef91..e60416d3f81 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1710,7 +1710,7 @@ static void nfs_access_free_list(struct list_head *head)
}
}
-int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
+int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
{
LIST_HEAD(head);
struct nfs_inode *nfsi;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 36a5e74f51b..f036153d9f5 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -27,6 +27,7 @@
#include <linux/pagemap.h>
#include <linux/aio.h>
#include <linux/gfp.h>
+#include <linux/swap.h>
#include <asm/uaccess.h>
#include <asm/system.h>
@@ -493,11 +494,19 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset)
*/
static int nfs_release_page(struct page *page, gfp_t gfp)
{
+ struct address_space *mapping = page->mapping;
+
dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page);
/* Only do I/O if gfp is a superset of GFP_KERNEL */
- if ((gfp & GFP_KERNEL) == GFP_KERNEL)
- nfs_wb_page(page->mapping->host, page);
+ if (mapping && (gfp & GFP_KERNEL) == GFP_KERNEL) {
+ int how = FLUSH_SYNC;
+
+ /* Don't let kswapd deadlock waiting for OOM RPC calls */
+ if (current_is_kswapd())
+ how = 0;
+ nfs_commit_inode(mapping->host, how);
+ }
/* If PagePrivate() is set, then the page is not freeable */
if (PagePrivate(page))
return 0;
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 7428f7d6273..a70e446e160 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -146,7 +146,7 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh)
goto out;
}
- if (!(fsinfo.fattr->valid & NFS_ATTR_FATTR_MODE)
+ if (!(fsinfo.fattr->valid & NFS_ATTR_FATTR_TYPE)
|| !S_ISDIR(fsinfo.fattr->mode)) {
printk(KERN_ERR "nfs4_get_rootfh:"
" getroot encountered non-directory\n");
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index d8bd619e386..e70f44b9b3f 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -205,7 +205,8 @@ extern struct rpc_procinfo nfs4_procedures[];
void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
/* dir.c */
-extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask);
+extern int nfs_access_cache_shrinker(struct shrinker *shrink,
+ int nr_to_scan, gfp_t gfp_mask);
/* inode.c */
extern struct workqueue_struct *nfsiod_workqueue;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 6bdef28efa3..65c8dae4b26 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -862,8 +862,8 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
*p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME);
*p++ = cpu_to_be32(0);
- *p++ = cpu_to_be32(iap->ia_mtime.tv_sec);
- *p++ = cpu_to_be32(iap->ia_mtime.tv_nsec);
+ *p++ = cpu_to_be32(iap->ia_atime.tv_sec);
+ *p++ = cpu_to_be32(iap->ia_atime.tv_nsec);
}
else if (iap->ia_valid & ATTR_ATIME) {
bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 6bd19d843af..df101d9f546 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -105,7 +105,7 @@ static char nfs_root_name[256] __initdata = "";
static __be32 servaddr __initdata = 0;
/* Name of directory to mount */
-static char nfs_export_path[NFS_MAXPATHLEN] __initdata = { 0, };
+static char nfs_export_path[NFS_MAXPATHLEN + 1] __initdata = { 0, };
/* NFS-related data */
static struct nfs_mount_data nfs_data __initdata = { 0, };/* NFS mount info */
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 04214fc5c30..f9df16de4a5 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -570,6 +570,22 @@ static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss,
nfs_show_mountd_netid(m, nfss, showdefaults);
}
+#ifdef CONFIG_NFS_V4
+static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss,
+ int showdefaults)
+{
+ struct nfs_client *clp = nfss->nfs_client;
+
+ seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr);
+ seq_printf(m, ",minorversion=%u", clp->cl_minorversion);
+}
+#else
+static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss,
+ int showdefaults)
+{
+}
+#endif
+
/*
* Describe the mount options in force on this server representation
*/
@@ -631,11 +647,9 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
if (version != 4)
nfs_show_mountd_options(m, nfss, showdefaults);
+ else
+ nfs_show_nfsv4_options(m, nfss, showdefaults);
-#ifdef CONFIG_NFS_V4
- if (clp->rpc_ops->version == 4)
- seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr);
-#endif
if (nfss->options & NFS_OPTION_FSCACHE)
seq_printf(m, ",fsc");
}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 91679e2631e..9f81bdd91c5 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -222,7 +222,7 @@ static void nfs_end_page_writeback(struct page *page)
clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
}
-static struct nfs_page *nfs_find_and_lock_request(struct page *page)
+static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblock)
{
struct inode *inode = page->mapping->host;
struct nfs_page *req;
@@ -241,7 +241,10 @@ static struct nfs_page *nfs_find_and_lock_request(struct page *page)
* request as dirty (in which case we don't care).
*/
spin_unlock(&inode->i_lock);
- ret = nfs_wait_on_request(req);
+ if (!nonblock)
+ ret = nfs_wait_on_request(req);
+ else
+ ret = -EAGAIN;
nfs_release_request(req);
if (ret != 0)
return ERR_PTR(ret);
@@ -256,12 +259,12 @@ static struct nfs_page *nfs_find_and_lock_request(struct page *page)
* May return an error if the user signalled nfs_wait_on_request().
*/
static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
- struct page *page)
+ struct page *page, bool nonblock)
{
struct nfs_page *req;
int ret = 0;
- req = nfs_find_and_lock_request(page);
+ req = nfs_find_and_lock_request(page, nonblock);
if (!req)
goto out;
ret = PTR_ERR(req);
@@ -283,12 +286,20 @@ out:
static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio)
{
struct inode *inode = page->mapping->host;
+ int ret;
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
nfs_pageio_cond_complete(pgio, page->index);
- return nfs_page_async_flush(pgio, page);
+ ret = nfs_page_async_flush(pgio, page,
+ wbc->sync_mode == WB_SYNC_NONE ||
+ wbc->nonblocking != 0);
+ if (ret == -EAGAIN) {
+ redirty_page_for_writepage(wbc, page);
+ ret = 0;
+ }
+ return ret;
}
/*
@@ -1379,7 +1390,7 @@ static const struct rpc_call_ops nfs_commit_ops = {
.rpc_release = nfs_commit_release,
};
-static int nfs_commit_inode(struct inode *inode, int how)
+int nfs_commit_inode(struct inode *inode, int how)
{
LIST_HEAD(head);
int may_wait = how & FLUSH_SYNC;
@@ -1443,11 +1454,6 @@ out_mark_dirty:
return ret;
}
#else
-static int nfs_commit_inode(struct inode *inode, int how)
-{
- return 0;
-}
-
static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc)
{
return 0;
@@ -1546,7 +1552,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
nfs_fscache_release_page(page, GFP_KERNEL);
- req = nfs_find_and_lock_request(page);
+ req = nfs_find_and_lock_request(page, false);
ret = PTR_ERR(req);
if (IS_ERR(req))
goto out;
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 3623ca20cc1..96337a4fbbd 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -196,15 +196,14 @@ int ocfs2_get_block(struct inode *inode, sector_t iblock,
dump_stack();
goto bail;
}
-
- past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
- mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino,
- (unsigned long long)past_eof);
-
- if (create && (iblock >= past_eof))
- set_buffer_new(bh_result);
}
+ past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
+ mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino,
+ (unsigned long long)past_eof);
+ if (create && (iblock >= past_eof))
+ set_buffer_new(bh_result);
+
bail:
if (err < 0)
err = -EIO;
@@ -459,36 +458,6 @@ int walk_page_buffers( handle_t *handle,
return ret;
}
-handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
- struct page *page,
- unsigned from,
- unsigned to)
-{
- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
- handle_t *handle;
- int ret = 0;
-
- handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
- if (IS_ERR(handle)) {
- ret = -ENOMEM;
- mlog_errno(ret);
- goto out;
- }
-
- if (ocfs2_should_order_data(inode)) {
- ret = ocfs2_jbd2_file_inode(handle, inode);
- if (ret < 0)
- mlog_errno(ret);
- }
-out:
- if (ret) {
- if (!IS_ERR(handle))
- ocfs2_commit_trans(osb, handle);
- handle = ERR_PTR(ret);
- }
- return handle;
-}
-
static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
{
sector_t status;
@@ -609,7 +578,9 @@ bail:
static void ocfs2_dio_end_io(struct kiocb *iocb,
loff_t offset,
ssize_t bytes,
- void *private)
+ void *private,
+ int ret,
+ bool is_async)
{
struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
int level;
@@ -623,6 +594,9 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
if (!level)
up_read(&inode->i_alloc_sem);
ocfs2_rw_unlock(inode, level);
+
+ if (is_async)
+ aio_complete(iocb, ret, 0);
}
/*
@@ -1131,23 +1105,37 @@ out:
*/
static int ocfs2_grab_pages_for_write(struct address_space *mapping,
struct ocfs2_write_ctxt *wc,
- u32 cpos, loff_t user_pos, int new,
+ u32 cpos, loff_t user_pos,
+ unsigned user_len, int new,
struct page *mmap_page)
{
int ret = 0, i;
- unsigned long start, target_index, index;
+ unsigned long start, target_index, end_index, index;
struct inode *inode = mapping->host;
+ loff_t last_byte;
target_index = user_pos >> PAGE_CACHE_SHIFT;
/*
* Figure out how many pages we'll be manipulating here. For
* non allocating write, we just change the one
- * page. Otherwise, we'll need a whole clusters worth.
+ * page. Otherwise, we'll need a whole clusters worth. If we're
+ * writing past i_size, we only need enough pages to cover the
+ * last page of the write.
*/
if (new) {
wc->w_num_pages = ocfs2_pages_per_cluster(inode->i_sb);
start = ocfs2_align_clusters_to_page_index(inode->i_sb, cpos);
+ /*
+ * We need the index *past* the last page we could possibly
+ * touch. This is the page past the end of the write or
+ * i_size, whichever is greater.
+ */
+ last_byte = max(user_pos + user_len, i_size_read(inode));
+ BUG_ON(last_byte < 1);
+ end_index = ((last_byte - 1) >> PAGE_CACHE_SHIFT) + 1;
+ if ((start + wc->w_num_pages) > end_index)
+ wc->w_num_pages = end_index - start;
} else {
wc->w_num_pages = 1;
start = target_index;
@@ -1620,21 +1608,20 @@ out:
* write path can treat it as an non-allocating write, which has no
* special case code for sparse/nonsparse files.
*/
-static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos,
- unsigned len,
+static int ocfs2_expand_nonsparse_inode(struct inode *inode,
+ struct buffer_head *di_bh,
+ loff_t pos, unsigned len,
struct ocfs2_write_ctxt *wc)
{
int ret;
- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
loff_t newsize = pos + len;
- if (ocfs2_sparse_alloc(osb))
- return 0;
+ BUG_ON(ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)));
if (newsize <= i_size_read(inode))
return 0;
- ret = ocfs2_extend_no_holes(inode, newsize, pos);
+ ret = ocfs2_extend_no_holes(inode, di_bh, newsize, pos);
if (ret)
mlog_errno(ret);
@@ -1644,6 +1631,18 @@ static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos,
return ret;
}
+static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh,
+ loff_t pos)
+{
+ int ret = 0;
+
+ BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)));
+ if (pos > i_size_read(inode))
+ ret = ocfs2_zero_extend(inode, di_bh, pos);
+
+ return ret;
+}
+
int ocfs2_write_begin_nolock(struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata,
@@ -1679,7 +1678,11 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
}
}
- ret = ocfs2_expand_nonsparse_inode(inode, pos, len, wc);
+ if (ocfs2_sparse_alloc(osb))
+ ret = ocfs2_zero_tail(inode, di_bh, pos);
+ else
+ ret = ocfs2_expand_nonsparse_inode(inode, di_bh, pos, len,
+ wc);
if (ret) {
mlog_errno(ret);
goto out;
@@ -1789,7 +1792,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
* that we can zero and flush if we error after adding the
* extent.
*/
- ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos,
+ ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len,
cluster_of_pages, mmap_page);
if (ret) {
mlog_errno(ret);
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 6b5a492e174..153abb5abef 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1671,7 +1671,7 @@ struct dlm_ctxt * dlm_register_domain(const char *domain,
struct dlm_ctxt *dlm = NULL;
struct dlm_ctxt *new_ctxt = NULL;
- if (strlen(domain) > O2NM_MAX_NAME_LEN) {
+ if (strlen(domain) >= O2NM_MAX_NAME_LEN) {
ret = -ENAMETOOLONG;
mlog(ML_ERROR, "domain name length too long\n");
goto leave;
@@ -1709,6 +1709,7 @@ retry:
}
if (dlm_protocol_compare(&dlm->fs_locking_proto, fs_proto)) {
+ spin_unlock(&dlm_domain_lock);
mlog(ML_ERROR,
"Requested locking protocol version is not "
"compatible with already registered domain "
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 4a7506a4e31..94b97fc6a88 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2808,14 +2808,8 @@ again:
mlog(0, "trying again...\n");
goto again;
}
- /* now that we are sure the MIGRATING state is there, drop
- * the unneded state which blocked threads trying to DIRTY */
- spin_lock(&res->spinlock);
- BUG_ON(!(res->state & DLM_LOCK_RES_BLOCK_DIRTY));
- BUG_ON(!(res->state & DLM_LOCK_RES_MIGRATING));
- res->state &= ~DLM_LOCK_RES_BLOCK_DIRTY;
- spin_unlock(&res->spinlock);
+ ret = 0;
/* did the target go down or die? */
spin_lock(&dlm->spinlock);
if (!test_bit(target, dlm->domain_map)) {
@@ -2826,9 +2820,21 @@ again:
spin_unlock(&dlm->spinlock);
/*
+ * if target is down, we need to clear DLM_LOCK_RES_BLOCK_DIRTY for
+ * another try; otherwise, we are sure the MIGRATING state is there,
+ * drop the unneded state which blocked threads trying to DIRTY
+ */
+ spin_lock(&res->spinlock);
+ BUG_ON(!(res->state & DLM_LOCK_RES_BLOCK_DIRTY));
+ res->state &= ~DLM_LOCK_RES_BLOCK_DIRTY;
+ if (!ret)
+ BUG_ON(!(res->state & DLM_LOCK_RES_MIGRATING));
+ spin_unlock(&res->spinlock);
+
+ /*
* at this point:
*
- * o the DLM_LOCK_RES_MIGRATING flag is set
+ * o the DLM_LOCK_RES_MIGRATING flag is set if target not down
* o there are no pending asts on this lockres
* o all processes trying to reserve an ast on this
* lockres must wait for the MIGRATING flag to clear
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index f8b75ce4be7..9dfaac73b36 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -463,7 +463,7 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) {
int bit;
- bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES+1, 0);
+ bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES, 0);
if (bit >= O2NM_MAX_NODES || bit < 0)
dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM);
else
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 6a13ea64c44..2b10b36d157 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -724,28 +724,55 @@ leave:
return status;
}
+/*
+ * While a write will already be ordering the data, a truncate will not.
+ * Thus, we need to explicitly order the zeroed pages.
+ */
+static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode)
+{
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ handle_t *handle = NULL;
+ int ret = 0;
+
+ if (!ocfs2_should_order_data(inode))
+ goto out;
+
+ handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
+ if (IS_ERR(handle)) {
+ ret = -ENOMEM;
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ret = ocfs2_jbd2_file_inode(handle, inode);
+ if (ret < 0)
+ mlog_errno(ret);
+
+out:
+ if (ret) {
+ if (!IS_ERR(handle))
+ ocfs2_commit_trans(osb, handle);
+ handle = ERR_PTR(ret);
+ }
+ return handle;
+}
+
/* Some parts of this taken from generic_cont_expand, which turned out
* to be too fragile to do exactly what we need without us having to
* worry about recursive locking in ->write_begin() and ->write_end(). */
-static int ocfs2_write_zero_page(struct inode *inode,
- u64 size)
+static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
+ u64 abs_to)
{
struct address_space *mapping = inode->i_mapping;
struct page *page;
- unsigned long index;
- unsigned int offset;
+ unsigned long index = abs_from >> PAGE_CACHE_SHIFT;
handle_t *handle = NULL;
- int ret;
+ int ret = 0;
+ unsigned zero_from, zero_to, block_start, block_end;
- offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */
- /* ugh. in prepare/commit_write, if from==to==start of block, we
- ** skip the prepare. make sure we never send an offset for the start
- ** of a block
- */
- if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
- offset++;
- }
- index = size >> PAGE_CACHE_SHIFT;
+ BUG_ON(abs_from >= abs_to);
+ BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT));
+ BUG_ON(abs_from & (inode->i_blkbits - 1));
page = grab_cache_page(mapping, index);
if (!page) {
@@ -754,31 +781,56 @@ static int ocfs2_write_zero_page(struct inode *inode,
goto out;
}
- ret = ocfs2_prepare_write_nolock(inode, page, offset, offset);
- if (ret < 0) {
- mlog_errno(ret);
- goto out_unlock;
- }
+ /* Get the offsets within the page that we want to zero */
+ zero_from = abs_from & (PAGE_CACHE_SIZE - 1);
+ zero_to = abs_to & (PAGE_CACHE_SIZE - 1);
+ if (!zero_to)
+ zero_to = PAGE_CACHE_SIZE;
- if (ocfs2_should_order_data(inode)) {
- handle = ocfs2_start_walk_page_trans(inode, page, offset,
- offset);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- handle = NULL;
+ mlog(0,
+ "abs_from = %llu, abs_to = %llu, index = %lu, zero_from = %u, zero_to = %u\n",
+ (unsigned long long)abs_from, (unsigned long long)abs_to,
+ index, zero_from, zero_to);
+
+ /* We know that zero_from is block aligned */
+ for (block_start = zero_from; block_start < zero_to;
+ block_start = block_end) {
+ block_end = block_start + (1 << inode->i_blkbits);
+
+ /*
+ * block_start is block-aligned. Bump it by one to
+ * force ocfs2_{prepare,commit}_write() to zero the
+ * whole block.
+ */
+ ret = ocfs2_prepare_write_nolock(inode, page,
+ block_start + 1,
+ block_start + 1);
+ if (ret < 0) {
+ mlog_errno(ret);
goto out_unlock;
}
- }
- /* must not update i_size! */
- ret = block_commit_write(page, offset, offset);
- if (ret < 0)
- mlog_errno(ret);
- else
- ret = 0;
+ if (!handle) {
+ handle = ocfs2_zero_start_ordered_transaction(inode);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ handle = NULL;
+ break;
+ }
+ }
+
+ /* must not update i_size! */
+ ret = block_commit_write(page, block_start + 1,
+ block_start + 1);
+ if (ret < 0)
+ mlog_errno(ret);
+ else
+ ret = 0;
+ }
if (handle)
ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
+
out_unlock:
unlock_page(page);
page_cache_release(page);
@@ -786,22 +838,114 @@ out:
return ret;
}
-static int ocfs2_zero_extend(struct inode *inode,
- u64 zero_to_size)
+/*
+ * Find the next range to zero. We do this in terms of bytes because
+ * that's what ocfs2_zero_extend() wants, and it is dealing with the
+ * pagecache. We may return multiple extents.
+ *
+ * zero_start and zero_end are ocfs2_zero_extend()s current idea of what
+ * needs to be zeroed. range_start and range_end return the next zeroing
+ * range. A subsequent call should pass the previous range_end as its
+ * zero_start. If range_end is 0, there's nothing to do.
+ *
+ * Unwritten extents are skipped over. Refcounted extents are CoWd.
+ */
+static int ocfs2_zero_extend_get_range(struct inode *inode,
+ struct buffer_head *di_bh,
+ u64 zero_start, u64 zero_end,
+ u64 *range_start, u64 *range_end)
{
- int ret = 0;
- u64 start_off;
- struct super_block *sb = inode->i_sb;
+ int rc = 0, needs_cow = 0;
+ u32 p_cpos, zero_clusters = 0;
+ u32 zero_cpos =
+ zero_start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
+ u32 last_cpos = ocfs2_clusters_for_bytes(inode->i_sb, zero_end);
+ unsigned int num_clusters = 0;
+ unsigned int ext_flags = 0;
- start_off = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));
- while (start_off < zero_to_size) {
- ret = ocfs2_write_zero_page(inode, start_off);
- if (ret < 0) {
- mlog_errno(ret);
+ while (zero_cpos < last_cpos) {
+ rc = ocfs2_get_clusters(inode, zero_cpos, &p_cpos,
+ &num_clusters, &ext_flags);
+ if (rc) {
+ mlog_errno(rc);
+ goto out;
+ }
+
+ if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
+ zero_clusters = num_clusters;
+ if (ext_flags & OCFS2_EXT_REFCOUNTED)
+ needs_cow = 1;
+ break;
+ }
+
+ zero_cpos += num_clusters;
+ }
+ if (!zero_clusters) {
+ *range_end = 0;
+ goto out;
+ }
+
+ while ((zero_cpos + zero_clusters) < last_cpos) {
+ rc = ocfs2_get_clusters(inode, zero_cpos + zero_clusters,
+ &p_cpos, &num_clusters,
+ &ext_flags);
+ if (rc) {
+ mlog_errno(rc);
goto out;
}
- start_off += sb->s_blocksize;
+ if (!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN))
+ break;
+ if (ext_flags & OCFS2_EXT_REFCOUNTED)
+ needs_cow = 1;
+ zero_clusters += num_clusters;
+ }
+ if ((zero_cpos + zero_clusters) > last_cpos)
+ zero_clusters = last_cpos - zero_cpos;
+
+ if (needs_cow) {
+ rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos, zero_clusters,
+ UINT_MAX);
+ if (rc) {
+ mlog_errno(rc);
+ goto out;
+ }
+ }
+
+ *range_start = ocfs2_clusters_to_bytes(inode->i_sb, zero_cpos);
+ *range_end = ocfs2_clusters_to_bytes(inode->i_sb,
+ zero_cpos + zero_clusters);
+
+out:
+ return rc;
+}
+
+/*
+ * Zero one range returned from ocfs2_zero_extend_get_range(). The caller
+ * has made sure that the entire range needs zeroing.
+ */
+static int ocfs2_zero_extend_range(struct inode *inode, u64 range_start,
+ u64 range_end)
+{
+ int rc = 0;
+ u64 next_pos;
+ u64 zero_pos = range_start;
+
+ mlog(0, "range_start = %llu, range_end = %llu\n",
+ (unsigned long long)range_start,
+ (unsigned long long)range_end);
+ BUG_ON(range_start >= range_end);
+
+ while (zero_pos < range_end) {
+ next_pos = (zero_pos & PAGE_CACHE_MASK) + PAGE_CACHE_SIZE;
+ if (next_pos > range_end)
+ next_pos = range_end;
+ rc = ocfs2_write_zero_page(inode, zero_pos, next_pos);
+ if (rc < 0) {
+ mlog_errno(rc);
+ break;
+ }
+ zero_pos = next_pos;
/*
* Very large extends have the potential to lock up
@@ -810,16 +954,63 @@ static int ocfs2_zero_extend(struct inode *inode,
cond_resched();
}
-out:
+ return rc;
+}
+
+int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
+ loff_t zero_to_size)
+{
+ int ret = 0;
+ u64 zero_start, range_start = 0, range_end = 0;
+ struct super_block *sb = inode->i_sb;
+
+ zero_start = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));
+ mlog(0, "zero_start %llu for i_size %llu\n",
+ (unsigned long long)zero_start,
+ (unsigned long long)i_size_read(inode));
+ while (zero_start < zero_to_size) {
+ ret = ocfs2_zero_extend_get_range(inode, di_bh, zero_start,
+ zero_to_size,
+ &range_start,
+ &range_end);
+ if (ret) {
+ mlog_errno(ret);
+ break;
+ }
+ if (!range_end)
+ break;
+ /* Trim the ends */
+ if (range_start < zero_start)
+ range_start = zero_start;
+ if (range_end > zero_to_size)
+ range_end = zero_to_size;
+
+ ret = ocfs2_zero_extend_range(inode, range_start,
+ range_end);
+ if (ret) {
+ mlog_errno(ret);
+ break;
+ }
+ zero_start = range_end;
+ }
+
return ret;
}
-int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to)
+int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh,
+ u64 new_i_size, u64 zero_to)
{
int ret;
u32 clusters_to_add;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
+ /*
+ * Only quota files call this without a bh, and they can't be
+ * refcounted.
+ */
+ BUG_ON(!di_bh && (oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
+ BUG_ON(!di_bh && !(oi->ip_flags & OCFS2_INODE_SYSTEM_FILE));
+
clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size);
if (clusters_to_add < oi->ip_clusters)
clusters_to_add = 0;
@@ -840,7 +1031,7 @@ int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to)
* still need to zero the area between the old i_size and the
* new i_size.
*/
- ret = ocfs2_zero_extend(inode, zero_to);
+ ret = ocfs2_zero_extend(inode, di_bh, zero_to);
if (ret < 0)
mlog_errno(ret);
@@ -862,27 +1053,15 @@ static int ocfs2_extend_file(struct inode *inode,
goto out;
if (i_size_read(inode) == new_i_size)
- goto out;
+ goto out;
BUG_ON(new_i_size < i_size_read(inode));
/*
- * Fall through for converting inline data, even if the fs
- * supports sparse files.
- *
- * The check for inline data here is legal - nobody can add
- * the feature since we have i_mutex. We must check it again
- * after acquiring ip_alloc_sem though, as paths like mmap
- * might have raced us to converting the inode to extents.
- */
- if (!(oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)
- && ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
- goto out_update_size;
-
- /*
* The alloc sem blocks people in read/write from reading our
* allocation until we're done changing it. We depend on
* i_mutex to block other extend/truncate calls while we're
- * here.
+ * here. We even have to hold it for sparse files because there
+ * might be some tail zeroing.
*/
down_write(&oi->ip_alloc_sem);
@@ -899,14 +1078,16 @@ static int ocfs2_extend_file(struct inode *inode,
ret = ocfs2_convert_inline_data_to_extents(inode, di_bh);
if (ret) {
up_write(&oi->ip_alloc_sem);
-
mlog_errno(ret);
goto out;
}
}
- if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
- ret = ocfs2_extend_no_holes(inode, new_i_size, new_i_size);
+ if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
+ ret = ocfs2_zero_extend(inode, di_bh, new_i_size);
+ else
+ ret = ocfs2_extend_no_holes(inode, di_bh, new_i_size,
+ new_i_size);
up_write(&oi->ip_alloc_sem);
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index d66cf4f7c70..97bf761c9e7 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -54,8 +54,10 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb,
int ocfs2_simple_size_update(struct inode *inode,
struct buffer_head *di_bh,
u64 new_i_size);
-int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size,
- u64 zero_to);
+int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh,
+ u64 new_i_size, u64 zero_to);
+int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
+ loff_t zero_to);
int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 47878cf1641..625de9d7088 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -472,7 +472,7 @@ static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger
return container_of(triggers, struct ocfs2_triggers, ot_triggers);
}
-static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
+static void ocfs2_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
struct buffer_head *bh,
void *data, size_t size)
{
@@ -491,7 +491,7 @@ static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
* Quota blocks have their own trigger because the struct ocfs2_block_check
* offset depends on the blocksize.
*/
-static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
+static void ocfs2_dq_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
struct buffer_head *bh,
void *data, size_t size)
{
@@ -511,7 +511,7 @@ static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
* Directory blocks also have their own trigger because the
* struct ocfs2_block_check offset depends on the blocksize.
*/
-static void ocfs2_db_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
+static void ocfs2_db_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
struct buffer_head *bh,
void *data, size_t size)
{
@@ -544,7 +544,7 @@ static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers,
static struct ocfs2_triggers di_triggers = {
.ot_triggers = {
- .t_commit = ocfs2_commit_trigger,
+ .t_frozen = ocfs2_frozen_trigger,
.t_abort = ocfs2_abort_trigger,
},
.ot_offset = offsetof(struct ocfs2_dinode, i_check),
@@ -552,7 +552,7 @@ static struct ocfs2_triggers di_triggers = {
static struct ocfs2_triggers eb_triggers = {
.ot_triggers = {
- .t_commit = ocfs2_commit_trigger,
+ .t_frozen = ocfs2_frozen_trigger,
.t_abort = ocfs2_abort_trigger,
},
.ot_offset = offsetof(struct ocfs2_extent_block, h_check),
@@ -560,7 +560,7 @@ static struct ocfs2_triggers eb_triggers = {
static struct ocfs2_triggers rb_triggers = {
.ot_triggers = {
- .t_commit = ocfs2_commit_trigger,
+ .t_frozen = ocfs2_frozen_trigger,
.t_abort = ocfs2_abort_trigger,
},
.ot_offset = offsetof(struct ocfs2_refcount_block, rf_check),
@@ -568,7 +568,7 @@ static struct ocfs2_triggers rb_triggers = {
static struct ocfs2_triggers gd_triggers = {
.ot_triggers = {
- .t_commit = ocfs2_commit_trigger,
+ .t_frozen = ocfs2_frozen_trigger,
.t_abort = ocfs2_abort_trigger,
},
.ot_offset = offsetof(struct ocfs2_group_desc, bg_check),
@@ -576,14 +576,14 @@ static struct ocfs2_triggers gd_triggers = {
static struct ocfs2_triggers db_triggers = {
.ot_triggers = {
- .t_commit = ocfs2_db_commit_trigger,
+ .t_frozen = ocfs2_db_frozen_trigger,
.t_abort = ocfs2_abort_trigger,
},
};
static struct ocfs2_triggers xb_triggers = {
.ot_triggers = {
- .t_commit = ocfs2_commit_trigger,
+ .t_frozen = ocfs2_frozen_trigger,
.t_abort = ocfs2_abort_trigger,
},
.ot_offset = offsetof(struct ocfs2_xattr_block, xb_check),
@@ -591,14 +591,14 @@ static struct ocfs2_triggers xb_triggers = {
static struct ocfs2_triggers dq_triggers = {
.ot_triggers = {
- .t_commit = ocfs2_dq_commit_trigger,
+ .t_frozen = ocfs2_dq_frozen_trigger,
.t_abort = ocfs2_abort_trigger,
},
};
static struct ocfs2_triggers dr_triggers = {
.ot_triggers = {
- .t_commit = ocfs2_commit_trigger,
+ .t_frozen = ocfs2_frozen_trigger,
.t_abort = ocfs2_abort_trigger,
},
.ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check),
@@ -606,7 +606,7 @@ static struct ocfs2_triggers dr_triggers = {
static struct ocfs2_triggers dl_triggers = {
.ot_triggers = {
- .t_commit = ocfs2_commit_trigger,
+ .t_frozen = ocfs2_frozen_trigger,
.t_abort = ocfs2_abort_trigger,
},
.ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check),
@@ -1936,7 +1936,7 @@ void ocfs2_orphan_scan_work(struct work_struct *work)
mutex_lock(&os->os_lock);
ocfs2_queue_orphan_scan(osb);
if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE)
- schedule_delayed_work(&os->os_orphan_scan_work,
+ queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work,
ocfs2_orphan_scan_timeout());
mutex_unlock(&os->os_lock);
}
@@ -1976,8 +1976,8 @@ void ocfs2_orphan_scan_start(struct ocfs2_super *osb)
atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
else {
atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE);
- schedule_delayed_work(&os->os_orphan_scan_work,
- ocfs2_orphan_scan_timeout());
+ queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work,
+ ocfs2_orphan_scan_timeout());
}
}
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 3d7419682dc..ec6adbf8f55 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -118,6 +118,7 @@ unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb)
{
unsigned int la_mb;
unsigned int gd_mb;
+ unsigned int la_max_mb;
unsigned int megs_per_slot;
struct super_block *sb = osb->sb;
@@ -182,6 +183,12 @@ unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb)
if (megs_per_slot < la_mb)
la_mb = megs_per_slot;
+ /* We can't store more bits than we can in a block. */
+ la_max_mb = ocfs2_clusters_to_megabytes(osb->sb,
+ ocfs2_local_alloc_size(sb) * 8);
+ if (la_mb > la_max_mb)
+ la_mb = la_max_mb;
+
return la_mb;
}
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 2bb35fe0051..4607923eb24 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -775,7 +775,7 @@ static int ocfs2_acquire_dquot(struct dquot *dquot)
* locking allocators ranks above a transaction start
*/
WARN_ON(journal_current_handle());
- status = ocfs2_extend_no_holes(gqinode,
+ status = ocfs2_extend_no_holes(gqinode, NULL,
gqinode->i_size + (need_alloc << sb->s_blocksize_bits),
gqinode->i_size);
if (status < 0)
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 8bd70d4d184..dc78764ccc4 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -971,7 +971,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
u64 p_blkno;
/* We are protected by dqio_sem so no locking needed */
- status = ocfs2_extend_no_holes(lqinode,
+ status = ocfs2_extend_no_holes(lqinode, NULL,
lqinode->i_size + 2 * sb->s_blocksize,
lqinode->i_size);
if (status < 0) {
@@ -1114,7 +1114,7 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
return ocfs2_local_quota_add_chunk(sb, type, offset);
/* We are protected by dqio_sem so no locking needed */
- status = ocfs2_extend_no_holes(lqinode,
+ status = ocfs2_extend_no_holes(lqinode, NULL,
lqinode->i_size + sb->s_blocksize,
lqinode->i_size);
if (status < 0) {
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 4793f36f651..3ac5aa733e9 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2931,6 +2931,12 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits;
end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits);
+ /*
+ * We only duplicate pages until we reach the page contains i_size - 1.
+ * So trim 'end' to i_size.
+ */
+ if (end > i_size_read(context->inode))
+ end = i_size_read(context->inode);
while (offset < end) {
page_index = offset >> PAGE_CACHE_SHIFT;
@@ -4166,6 +4172,12 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
struct inode *inode = old_dentry->d_inode;
struct buffer_head *new_bh = NULL;
+ if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) {
+ ret = -EINVAL;
+ mlog_errno(ret);
+ goto out;
+ }
+
ret = filemap_fdatawrite(inode->i_mapping);
if (ret) {
mlog_errno(ret);
diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c
index 40650021fc2..d8b6e4259b8 100644
--- a/fs/ocfs2/reservations.c
+++ b/fs/ocfs2/reservations.c
@@ -26,7 +26,6 @@
#include <linux/fs.h>
#include <linux/types.h>
-#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/bitops.h>
#include <linux/list.h>
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index f4c2a9eb8c4..a8e6a95a353 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -741,7 +741,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
le16_to_cpu(bg->bg_free_bits_count));
le32_add_cpu(&cl->cl_recs[alloc_rec].c_total,
le16_to_cpu(bg->bg_bits));
- cl->cl_recs[alloc_rec].c_blkno = cpu_to_le64(bg->bg_blkno);
+ cl->cl_recs[alloc_rec].c_blkno = bg->bg_blkno;
if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count))
le16_add_cpu(&cl->cl_next_free_rec, 1);
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index e97b34842cf..d03469f6180 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -709,7 +709,7 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
struct ocfs2_xattr_value_buf *vb,
struct ocfs2_xattr_set_ctxt *ctxt)
{
- int status = 0;
+ int status = 0, credits;
handle_t *handle = ctxt->handle;
enum ocfs2_alloc_restarted why;
u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
@@ -719,38 +719,54 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
- status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
- if (status < 0) {
- mlog_errno(status);
- goto leave;
- }
+ while (clusters_to_add) {
+ status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ mlog_errno(status);
+ break;
+ }
- prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
- status = ocfs2_add_clusters_in_btree(handle,
- &et,
- &logical_start,
- clusters_to_add,
- 0,
- ctxt->data_ac,
- ctxt->meta_ac,
- &why);
- if (status < 0) {
- mlog_errno(status);
- goto leave;
- }
+ prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
+ status = ocfs2_add_clusters_in_btree(handle,
+ &et,
+ &logical_start,
+ clusters_to_add,
+ 0,
+ ctxt->data_ac,
+ ctxt->meta_ac,
+ &why);
+ if ((status < 0) && (status != -EAGAIN)) {
+ if (status != -ENOSPC)
+ mlog_errno(status);
+ break;
+ }
- ocfs2_journal_dirty(handle, vb->vb_bh);
+ ocfs2_journal_dirty(handle, vb->vb_bh);
- clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - prev_clusters;
+ clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) -
+ prev_clusters;
- /*
- * We should have already allocated enough space before the transaction,
- * so no need to restart.
- */
- BUG_ON(why != RESTART_NONE || clusters_to_add);
-
-leave:
+ if (why != RESTART_NONE && clusters_to_add) {
+ /*
+ * We can only fail in case the alloc file doesn't give
+ * up enough clusters.
+ */
+ BUG_ON(why == RESTART_META);
+
+ mlog(0, "restarting xattr value extension for %u"
+ " clusters,.\n", clusters_to_add);
+ credits = ocfs2_calc_extend_credits(inode->i_sb,
+ &vb->vb_xv->xr_list,
+ clusters_to_add);
+ status = ocfs2_extend_trans(handle, credits);
+ if (status < 0) {
+ status = -ENOMEM;
+ mlog_errno(status);
+ break;
+ }
+ }
+ }
return status;
}
@@ -6788,16 +6804,15 @@ out:
return ret;
}
-static int ocfs2_reflink_xattr_buckets(handle_t *handle,
+static int ocfs2_reflink_xattr_bucket(handle_t *handle,
u64 blkno, u64 new_blkno, u32 clusters,
+ u32 *cpos, int num_buckets,
struct ocfs2_alloc_context *meta_ac,
struct ocfs2_alloc_context *data_ac,
struct ocfs2_reflink_xattr_tree_args *args)
{
int i, j, ret = 0;
struct super_block *sb = args->reflink->old_inode->i_sb;
- u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
- u32 num_buckets = clusters * bpc;
int bpb = args->old_bucket->bu_blocks;
struct ocfs2_xattr_value_buf vb = {
.vb_access = ocfs2_journal_access,
@@ -6816,14 +6831,6 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
break;
}
- /*
- * The real bucket num in this series of blocks is stored
- * in the 1st bucket.
- */
- if (i == 0)
- num_buckets = le16_to_cpu(
- bucket_xh(args->old_bucket)->xh_num_buckets);
-
ret = ocfs2_xattr_bucket_journal_access(handle,
args->new_bucket,
OCFS2_JOURNAL_ACCESS_CREATE);
@@ -6837,6 +6844,18 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
bucket_block(args->old_bucket, j),
sb->s_blocksize);
+ /*
+ * Record the start cpos so that we can use it to initialize
+ * our xattr tree we also set the xh_num_bucket for the new
+ * bucket.
+ */
+ if (i == 0) {
+ *cpos = le32_to_cpu(bucket_xh(args->new_bucket)->
+ xh_entries[0].xe_name_hash);
+ bucket_xh(args->new_bucket)->xh_num_buckets =
+ cpu_to_le16(num_buckets);
+ }
+
ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
ret = ocfs2_reflink_xattr_header(handle, args->reflink,
@@ -6866,6 +6885,7 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
}
ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
+
ocfs2_xattr_bucket_relse(args->old_bucket);
ocfs2_xattr_bucket_relse(args->new_bucket);
}
@@ -6874,6 +6894,75 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
ocfs2_xattr_bucket_relse(args->new_bucket);
return ret;
}
+
+static int ocfs2_reflink_xattr_buckets(handle_t *handle,
+ struct inode *inode,
+ struct ocfs2_reflink_xattr_tree_args *args,
+ struct ocfs2_extent_tree *et,
+ struct ocfs2_alloc_context *meta_ac,
+ struct ocfs2_alloc_context *data_ac,
+ u64 blkno, u32 cpos, u32 len)
+{
+ int ret, first_inserted = 0;
+ u32 p_cluster, num_clusters, reflink_cpos = 0;
+ u64 new_blkno;
+ unsigned int num_buckets, reflink_buckets;
+ unsigned int bpc =
+ ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
+
+ ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+ num_buckets = le16_to_cpu(bucket_xh(args->old_bucket)->xh_num_buckets);
+ ocfs2_xattr_bucket_relse(args->old_bucket);
+
+ while (len && num_buckets) {
+ ret = ocfs2_claim_clusters(handle, data_ac,
+ 1, &p_cluster, &num_clusters);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ new_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
+ reflink_buckets = min(num_buckets, bpc * num_clusters);
+
+ ret = ocfs2_reflink_xattr_bucket(handle, blkno,
+ new_blkno, num_clusters,
+ &reflink_cpos, reflink_buckets,
+ meta_ac, data_ac, args);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ /*
+ * For the 1st allocated cluster, we make it use the same cpos
+ * so that the xattr tree looks the same as the original one
+ * in the most case.
+ */
+ if (!first_inserted) {
+ reflink_cpos = cpos;
+ first_inserted = 1;
+ }
+ ret = ocfs2_insert_extent(handle, et, reflink_cpos, new_blkno,
+ num_clusters, 0, meta_ac);
+ if (ret)
+ mlog_errno(ret);
+
+ mlog(0, "insert new xattr extent rec start %llu len %u to %u\n",
+ (unsigned long long)new_blkno, num_clusters, reflink_cpos);
+
+ len -= num_clusters;
+ blkno += ocfs2_clusters_to_blocks(inode->i_sb, num_clusters);
+ num_buckets -= reflink_buckets;
+ }
+out:
+ return ret;
+}
+
/*
* Create the same xattr extent record in the new inode's xattr tree.
*/
@@ -6885,8 +6974,6 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode,
void *para)
{
int ret, credits = 0;
- u32 p_cluster, num_clusters;
- u64 new_blkno;
handle_t *handle;
struct ocfs2_reflink_xattr_tree_args *args =
(struct ocfs2_reflink_xattr_tree_args *)para;
@@ -6895,6 +6982,9 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode,
struct ocfs2_alloc_context *data_ac = NULL;
struct ocfs2_extent_tree et;
+ mlog(0, "reflink xattr buckets %llu len %u\n",
+ (unsigned long long)blkno, len);
+
ocfs2_init_xattr_tree_extent_tree(&et,
INODE_CACHE(args->reflink->new_inode),
args->new_blk_bh);
@@ -6914,32 +7004,12 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode,
goto out;
}
- ret = ocfs2_claim_clusters(handle, data_ac,
- len, &p_cluster, &num_clusters);
- if (ret) {
- mlog_errno(ret);
- goto out_commit;
- }
-
- new_blkno = ocfs2_clusters_to_blocks(osb->sb, p_cluster);
-
- mlog(0, "reflink xattr buckets %llu to %llu, len %u\n",
- (unsigned long long)blkno, (unsigned long long)new_blkno, len);
- ret = ocfs2_reflink_xattr_buckets(handle, blkno, new_blkno, len,
- meta_ac, data_ac, args);
- if (ret) {
- mlog_errno(ret);
- goto out_commit;
- }
-
- mlog(0, "insert new xattr extent rec start %llu len %u to %u\n",
- (unsigned long long)new_blkno, len, cpos);
- ret = ocfs2_insert_extent(handle, &et, cpos, new_blkno,
- len, 0, meta_ac);
+ ret = ocfs2_reflink_xattr_buckets(handle, inode, args, &et,
+ meta_ac, data_ac,
+ blkno, cpos, len);
if (ret)
mlog_errno(ret);
-out_commit:
ocfs2_commit_trans(osb, handle);
out:
diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c
index 3e73de5967f..fc8497643fd 100644
--- a/fs/partitions/ibm.c
+++ b/fs/partitions/ibm.c
@@ -74,6 +74,7 @@ int ibm_partition(struct parsed_partitions *state)
} *label;
unsigned char *data;
Sector sect;
+ sector_t labelsect;
res = 0;
blocksize = bdev_logical_block_size(bdev);
@@ -98,10 +99,19 @@ int ibm_partition(struct parsed_partitions *state)
goto out_freeall;
/*
+ * Special case for FBA disks: label sector does not depend on
+ * blocksize.
+ */
+ if ((info->cu_type == 0x6310 && info->dev_type == 0x9336) ||
+ (info->cu_type == 0x3880 && info->dev_type == 0x3370))
+ labelsect = info->label_block;
+ else
+ labelsect = info->label_block * (blocksize >> 9);
+
+ /*
* Get volume label, extract name and type.
*/
- data = read_part_sector(state, info->label_block*(blocksize/512),
- &sect);
+ data = read_part_sector(state, labelsect, &sect);
if (data == NULL)
goto out_readerr;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 9b58d38bc91..fff6572676a 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -176,7 +176,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
if (tracer)
tpid = task_pid_nr_ns(tracer, ns);
}
- cred = get_cred((struct cred *) __task_cred(p));
+ cred = get_task_cred(p);
seq_printf(m,
"State:\t%s\n"
"Tgid:\t%d\n"
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index ce94801f48c..d9396a4fc7f 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -209,6 +209,9 @@ void proc_device_tree_add_node(struct device_node *np,
for (pp = np->properties; pp != NULL; pp = pp->next) {
p = pp->name;
+ if (strchr(p, '/'))
+ continue;
+
if (duplicate_name(de, p))
p = fixup_name(np, de, p);
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 46d4b5d72bd..cb6306e6384 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -122,11 +122,20 @@ int task_statm(struct mm_struct *mm, int *shared, int *text,
return size;
}
+static void pad_len_spaces(struct seq_file *m, int len)
+{
+ len = 25 + sizeof(void*) * 6 - len;
+ if (len < 1)
+ len = 1;
+ seq_printf(m, "%*c", len, ' ');
+}
+
/*
* display a single VMA to a sequenced file
*/
static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
{
+ struct mm_struct *mm = vma->vm_mm;
unsigned long ino = 0;
struct file *file;
dev_t dev = 0;
@@ -155,11 +164,14 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
MAJOR(dev), MINOR(dev), ino, &len);
if (file) {
- len = 25 + sizeof(void *) * 6 - len;
- if (len < 1)
- len = 1;
- seq_printf(m, "%*c", len, ' ');
+ pad_len_spaces(m, len);
seq_path(m, &file->f_path, "");
+ } else if (mm) {
+ if (vma->vm_start <= mm->start_stack &&
+ vma->vm_end >= mm->start_stack) {
+ pad_len_spaces(m, len);
+ seq_puts(m, "[stack]");
+ }
}
seq_putc(m, '\n');
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 12c233da1b6..437d2ca2de9 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -676,7 +676,7 @@ static void prune_dqcache(int count)
* This is called from kswapd when we think we need some
* more memory
*/
-static int shrink_dqcache_memory(int nr, gfp_t gfp_mask)
+static int shrink_dqcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
{
if (nr) {
spin_lock(&dq_list_lock);
diff --git a/fs/splice.c b/fs/splice.c
index 740e6b9faf7..efdbfece993 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1282,7 +1282,8 @@ static int direct_splice_actor(struct pipe_inode_info *pipe,
{
struct file *file = sd->u.file;
- return do_splice_from(pipe, file, &sd->pos, sd->total_len, sd->flags);
+ return do_splice_from(pipe, file, &file->f_pos, sd->total_len,
+ sd->flags);
}
/**
@@ -1371,8 +1372,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
if (off_in)
return -ESPIPE;
if (off_out) {
- if (!out->f_op || !out->f_op->llseek ||
- out->f_op->llseek == no_llseek)
+ if (!(out->f_mode & FMODE_PWRITE))
return -EINVAL;
if (copy_from_user(&offset, off_out, sizeof(loff_t)))
return -EFAULT;
@@ -1392,8 +1392,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
if (off_out)
return -ESPIPE;
if (off_in) {
- if (!in->f_op || !in->f_op->llseek ||
- in->f_op->llseek == no_llseek)
+ if (!(in->f_mode & FMODE_PREAD))
return -EINVAL;
if (copy_from_user(&offset, off_in, sizeof(loff_t)))
return -EFAULT;
diff --git a/fs/super.c b/fs/super.c
index 5c35bc7a499..938119ab8dc 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -374,6 +374,8 @@ void sync_supers(void)
up_read(&sb->s_umount);
spin_lock(&sb_lock);
+ /* lock was dropped, must reset next */
+ list_safe_reset_next(sb, n, s_list);
__put_super(sb);
}
}
@@ -405,6 +407,8 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
up_read(&sb->s_umount);
spin_lock(&sb_lock);
+ /* lock was dropped, must reset next */
+ list_safe_reset_next(sb, n, s_list);
__put_super(sb);
}
spin_unlock(&sb_lock);
@@ -585,6 +589,8 @@ static void do_emergency_remount(struct work_struct *work)
}
up_write(&sb->s_umount);
spin_lock(&sb_lock);
+ /* lock was dropped, must reset next */
+ list_safe_reset_next(sb, n, s_list);
__put_super(sb);
}
spin_unlock(&sb_lock);
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index f71246bebfe..a7ac78f8e67 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -28,6 +28,7 @@ static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target,
struct sysfs_dirent *target_sd = NULL;
struct sysfs_dirent *sd = NULL;
struct sysfs_addrm_cxt acxt;
+ enum kobj_ns_type ns_type;
int error;
BUG_ON(!name);
@@ -58,16 +59,29 @@ static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target,
if (!sd)
goto out_put;
- if (sysfs_ns_type(parent_sd))
+ ns_type = sysfs_ns_type(parent_sd);
+ if (ns_type)
sd->s_ns = target->ktype->namespace(target);
sd->s_symlink.target_sd = target_sd;
target_sd = NULL; /* reference is now owned by the symlink */
sysfs_addrm_start(&acxt, parent_sd);
- if (warn)
- error = sysfs_add_one(&acxt, sd);
- else
- error = __sysfs_add_one(&acxt, sd);
+ /* Symlinks must be between directories with the same ns_type */
+ if (!ns_type ||
+ (ns_type == sysfs_ns_type(sd->s_symlink.target_sd->s_parent))) {
+ if (warn)
+ error = sysfs_add_one(&acxt, sd);
+ else
+ error = __sysfs_add_one(&acxt, sd);
+ } else {
+ error = -EINVAL;
+ WARN(1, KERN_WARNING
+ "sysfs: symlink across ns_types %s/%s -> %s/%s\n",
+ parent_sd->s_name,
+ sd->s_name,
+ sd->s_symlink.target_sd->s_parent->s_name,
+ sd->s_symlink.target_sd->s_name);
+ }
sysfs_addrm_finish(&acxt);
if (error)
@@ -122,7 +136,7 @@ void sysfs_delete_link(struct kobject *kobj, struct kobject *targ,
{
const void *ns = NULL;
spin_lock(&sysfs_assoc_lock);
- if (targ->sd)
+ if (targ->sd && sysfs_ns_type(kobj->sd))
ns = targ->sd->s_ns;
spin_unlock(&sysfs_assoc_lock);
sysfs_hash_and_remove(kobj->sd, ns, name);
diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c
index bbd69bdb0fa..fcc498ec9b3 100644
--- a/fs/sysv/ialloc.c
+++ b/fs/sysv/ialloc.c
@@ -25,6 +25,7 @@
#include <linux/stat.h>
#include <linux/string.h>
#include <linux/buffer_head.h>
+#include <linux/writeback.h>
#include "sysv.h"
/* We don't trust the value of
@@ -139,6 +140,9 @@ struct inode * sysv_new_inode(const struct inode * dir, mode_t mode)
struct inode *inode;
sysv_ino_t ino;
unsigned count;
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_NONE
+ };
inode = new_inode(sb);
if (!inode)
@@ -168,7 +172,7 @@ struct inode * sysv_new_inode(const struct inode * dir, mode_t mode)
insert_inode_hash(inode);
mark_inode_dirty(inode);
- sysv_write_inode(inode, 0); /* ensure inode not allocated again */
+ sysv_write_inode(inode, &wbc); /* ensure inode not allocated again */
mark_inode_dirty(inode); /* cleared by sysv_write_inode() */
/* That's it. */
unlock_super(sb);
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 076ca50e993..c8ff0d1ae5d 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -62,7 +62,9 @@
*/
static void shrink_liability(struct ubifs_info *c, int nr_to_write)
{
+ down_read(&c->vfs_sb->s_umount);
writeback_inodes_sb(c->vfs_sb);
+ up_read(&c->vfs_sb->s_umount);
}
/**
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index ad7f67b827e..0084a33c4c6 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -1457,13 +1457,13 @@ struct ubifs_lprops *ubifs_lpt_lookup(struct ubifs_info *c, int lnum)
shft -= UBIFS_LPT_FANOUT_SHIFT;
nnode = ubifs_get_nnode(c, nnode, iip);
if (IS_ERR(nnode))
- return ERR_PTR(PTR_ERR(nnode));
+ return ERR_CAST(nnode);
}
iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
shft -= UBIFS_LPT_FANOUT_SHIFT;
pnode = ubifs_get_pnode(c, nnode, iip);
if (IS_ERR(pnode))
- return ERR_PTR(PTR_ERR(pnode));
+ return ERR_CAST(pnode);
iip = (i & (UBIFS_LPT_FANOUT - 1));
dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum,
pnode->lprops[iip].free, pnode->lprops[iip].dirty,
@@ -1586,7 +1586,7 @@ struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum)
nnode = c->nroot;
nnode = dirty_cow_nnode(c, nnode);
if (IS_ERR(nnode))
- return ERR_PTR(PTR_ERR(nnode));
+ return ERR_CAST(nnode);
i = lnum - c->main_first;
shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT;
for (h = 1; h < c->lpt_hght; h++) {
@@ -1594,19 +1594,19 @@ struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum)
shft -= UBIFS_LPT_FANOUT_SHIFT;
nnode = ubifs_get_nnode(c, nnode, iip);
if (IS_ERR(nnode))
- return ERR_PTR(PTR_ERR(nnode));
+ return ERR_CAST(nnode);
nnode = dirty_cow_nnode(c, nnode);
if (IS_ERR(nnode))
- return ERR_PTR(PTR_ERR(nnode));
+ return ERR_CAST(nnode);
}
iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
shft -= UBIFS_LPT_FANOUT_SHIFT;
pnode = ubifs_get_pnode(c, nnode, iip);
if (IS_ERR(pnode))
- return ERR_PTR(PTR_ERR(pnode));
+ return ERR_CAST(pnode);
pnode = dirty_cow_pnode(c, pnode);
if (IS_ERR(pnode))
- return ERR_PTR(PTR_ERR(pnode));
+ return ERR_CAST(pnode);
iip = (i & (UBIFS_LPT_FANOUT - 1));
dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum,
pnode->lprops[iip].free, pnode->lprops[iip].dirty,
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index 13cb7a4237b..d12535b7fc7 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -646,7 +646,7 @@ static struct ubifs_pnode *pnode_lookup(struct ubifs_info *c, int i)
shft -= UBIFS_LPT_FANOUT_SHIFT;
nnode = ubifs_get_nnode(c, nnode, iip);
if (IS_ERR(nnode))
- return ERR_PTR(PTR_ERR(nnode));
+ return ERR_CAST(nnode);
}
iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
return ubifs_get_pnode(c, nnode, iip);
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index 109c6ea03bb..daae9e1f538 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -24,7 +24,7 @@
* This file implements functions needed to recover from unclean un-mounts.
* When UBIFS is mounted, it checks a flag on the master node to determine if
* an un-mount was completed successfully. If not, the process of mounting
- * incorparates additional checking and fixing of on-flash data structures.
+ * incorporates additional checking and fixing of on-flash data structures.
* UBIFS always cleans away all remnants of an unclean un-mount, so that
* errors do not accumulate. However UBIFS defers recovery if it is mounted
* read-only, and the flash is not modified in that case.
@@ -1063,8 +1063,21 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c)
}
err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2);
if (err) {
- if (err == -ENOSPC)
- dbg_err("could not find a dirty LEB");
+ /*
+ * There are no dirty or empty LEBs subject to here being
+ * enough for the index. Try to use
+ * 'ubifs_find_free_leb_for_idx()', which will return any empty
+ * LEBs (ignoring index requirements). If the index then
+ * doesn't have enough LEBs the recovery commit will fail -
+ * which is the same result anyway i.e. recovery fails. So
+ * there is no problem ignoring index requirements and just
+ * grabbing a free LEB since we have already established there
+ * is not a dirty LEB we could have used instead.
+ */
+ if (err == -ENOSPC) {
+ dbg_rcvry("could not find a dirty LEB");
+ goto find_free;
+ }
return err;
}
ubifs_assert(!(lp.flags & LPROPS_INDEX));
@@ -1139,8 +1152,8 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c)
find_free:
/*
* There is no GC head LEB or the free space in the GC head LEB is too
- * small. Allocate gc_lnum by calling 'ubifs_find_free_leb_for_idx()' so
- * GC is not run.
+ * small, or there are not dirty LEBs. Allocate gc_lnum by calling
+ * 'ubifs_find_free_leb_for_idx()' so GC is not run.
*/
lnum = ubifs_find_free_leb_for_idx(c);
if (lnum < 0) {
diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c
index 02feb59cefc..0b201114a5a 100644
--- a/fs/ubifs/shrinker.c
+++ b/fs/ubifs/shrinker.c
@@ -277,7 +277,7 @@ static int kick_a_thread(void)
return 0;
}
-int ubifs_shrinker(int nr, gfp_t gfp_mask)
+int ubifs_shrinker(struct shrinker *shrink, int nr, gfp_t gfp_mask)
{
int freed, contention = 0;
long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 4d2f2157dd3..5fc5a098897 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1307,6 +1307,8 @@ static int mount_ubifs(struct ubifs_info *c)
if (err)
goto out_orphans;
err = ubifs_rcvry_gc_commit(c);
+ if (err)
+ goto out_orphans;
} else {
err = take_gc_lnum(c);
if (err)
@@ -1318,7 +1320,7 @@ static int mount_ubifs(struct ubifs_info *c)
*/
err = ubifs_leb_unmap(c, c->gc_lnum);
if (err)
- return err;
+ goto out_orphans;
}
err = dbg_check_lprops(c);
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 2eef553d50c..04310878f44 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1575,7 +1575,7 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot);
int ubifs_tnc_end_commit(struct ubifs_info *c);
/* shrinker.c */
-int ubifs_shrinker(int nr_to_scan, gfp_t gfp_mask);
+int ubifs_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask);
/* commit.c */
int ubifs_bg_thread(void *info);
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index c8fb13f83b3..0dce969d6ca 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -87,11 +87,9 @@ xfs-y += xfs_alloc.o \
xfs_trans_buf.o \
xfs_trans_extfree.o \
xfs_trans_inode.o \
- xfs_trans_item.o \
xfs_utils.o \
xfs_vnodeops.o \
- xfs_rw.o \
- xfs_dmops.o
+ xfs_rw.o
xfs-$(CONFIG_XFS_TRACE) += xfs_btree_trace.o
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index 9f769b5b38f..b2771862fd3 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -225,7 +225,7 @@ xfs_check_acl(struct inode *inode, int mask)
struct posix_acl *acl;
int error = -EAGAIN;
- xfs_itrace_entry(ip);
+ trace_xfs_check_acl(ip);
/*
* If there is no attribute fork no ACL exists on this inode and
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 34640d6dbdc..d24e78f32f3 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -21,19 +21,12 @@
#include "xfs_inum.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
#include "xfs_trans.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_alloc.h"
-#include "xfs_btree.h"
#include "xfs_error.h"
#include "xfs_rw.h"
#include "xfs_iomap.h"
@@ -92,18 +85,15 @@ void
xfs_count_page_state(
struct page *page,
int *delalloc,
- int *unmapped,
int *unwritten)
{
struct buffer_head *bh, *head;
- *delalloc = *unmapped = *unwritten = 0;
+ *delalloc = *unwritten = 0;
bh = head = page_buffers(page);
do {
- if (buffer_uptodate(bh) && !buffer_mapped(bh))
- (*unmapped) = 1;
- else if (buffer_unwritten(bh))
+ if (buffer_unwritten(bh))
(*unwritten) = 1;
else if (buffer_delay(bh))
(*delalloc) = 1;
@@ -212,23 +202,17 @@ xfs_setfilesize(
}
/*
- * Schedule IO completion handling on a xfsdatad if this was
- * the final hold on this ioend. If we are asked to wait,
- * flush the workqueue.
+ * Schedule IO completion handling on the final put of an ioend.
*/
STATIC void
xfs_finish_ioend(
- xfs_ioend_t *ioend,
- int wait)
+ struct xfs_ioend *ioend)
{
if (atomic_dec_and_test(&ioend->io_remaining)) {
- struct workqueue_struct *wq;
-
- wq = (ioend->io_type == IO_UNWRITTEN) ?
- xfsconvertd_workqueue : xfsdatad_workqueue;
- queue_work(wq, &ioend->io_work);
- if (wait)
- flush_workqueue(wq);
+ if (ioend->io_type == IO_UNWRITTEN)
+ queue_work(xfsconvertd_workqueue, &ioend->io_work);
+ else
+ queue_work(xfsdatad_workqueue, &ioend->io_work);
}
}
@@ -272,11 +256,25 @@ xfs_end_io(
*/
if (error == EAGAIN) {
atomic_inc(&ioend->io_remaining);
- xfs_finish_ioend(ioend, 0);
+ xfs_finish_ioend(ioend);
/* ensure we don't spin on blocked ioends */
delay(1);
- } else
+ } else {
+ if (ioend->io_iocb)
+ aio_complete(ioend->io_iocb, ioend->io_result, 0);
xfs_destroy_ioend(ioend);
+ }
+}
+
+/*
+ * Call IO completion handling in caller context on the final put of an ioend.
+ */
+STATIC void
+xfs_finish_ioend_sync(
+ struct xfs_ioend *ioend)
+{
+ if (atomic_dec_and_test(&ioend->io_remaining))
+ xfs_end_io(&ioend->io_work);
}
/*
@@ -309,6 +307,8 @@ xfs_alloc_ioend(
atomic_inc(&XFS_I(ioend->io_inode)->i_iocount);
ioend->io_offset = 0;
ioend->io_size = 0;
+ ioend->io_iocb = NULL;
+ ioend->io_result = 0;
INIT_WORK(&ioend->io_work, xfs_end_io);
return ioend;
@@ -358,7 +358,7 @@ xfs_end_bio(
bio->bi_end_io = NULL;
bio_put(bio);
- xfs_finish_ioend(ioend, 0);
+ xfs_finish_ioend(ioend);
}
STATIC void
@@ -500,7 +500,7 @@ xfs_submit_ioend(
}
if (bio)
xfs_submit_ioend_bio(wbc, ioend, bio);
- xfs_finish_ioend(ioend, 0);
+ xfs_finish_ioend(ioend);
} while ((ioend = next) != NULL);
}
@@ -614,31 +614,30 @@ xfs_map_at_offset(
STATIC unsigned int
xfs_probe_page(
struct page *page,
- unsigned int pg_offset,
- int mapped)
+ unsigned int pg_offset)
{
+ struct buffer_head *bh, *head;
int ret = 0;
if (PageWriteback(page))
return 0;
+ if (!PageDirty(page))
+ return 0;
+ if (!page->mapping)
+ return 0;
+ if (!page_has_buffers(page))
+ return 0;
- if (page->mapping && PageDirty(page)) {
- if (page_has_buffers(page)) {
- struct buffer_head *bh, *head;
-
- bh = head = page_buffers(page);
- do {
- if (!buffer_uptodate(bh))
- break;
- if (mapped != buffer_mapped(bh))
- break;
- ret += bh->b_size;
- if (ret >= pg_offset)
- break;
- } while ((bh = bh->b_this_page) != head);
- } else
- ret = mapped ? 0 : PAGE_CACHE_SIZE;
- }
+ bh = head = page_buffers(page);
+ do {
+ if (!buffer_uptodate(bh))
+ break;
+ if (!buffer_mapped(bh))
+ break;
+ ret += bh->b_size;
+ if (ret >= pg_offset)
+ break;
+ } while ((bh = bh->b_this_page) != head);
return ret;
}
@@ -648,8 +647,7 @@ xfs_probe_cluster(
struct inode *inode,
struct page *startpage,
struct buffer_head *bh,
- struct buffer_head *head,
- int mapped)
+ struct buffer_head *head)
{
struct pagevec pvec;
pgoff_t tindex, tlast, tloff;
@@ -658,7 +656,7 @@ xfs_probe_cluster(
/* First sum forwards in this page */
do {
- if (!buffer_uptodate(bh) || (mapped != buffer_mapped(bh)))
+ if (!buffer_uptodate(bh) || !buffer_mapped(bh))
return total;
total += bh->b_size;
} while ((bh = bh->b_this_page) != head);
@@ -692,7 +690,7 @@ xfs_probe_cluster(
pg_offset = PAGE_CACHE_SIZE;
if (page->index == tindex && trylock_page(page)) {
- pg_len = xfs_probe_page(page, pg_offset, mapped);
+ pg_len = xfs_probe_page(page, pg_offset);
unlock_page(page);
}
@@ -761,7 +759,6 @@ xfs_convert_page(
struct xfs_bmbt_irec *imap,
xfs_ioend_t **ioendp,
struct writeback_control *wbc,
- int startio,
int all_bh)
{
struct buffer_head *bh, *head;
@@ -832,19 +829,14 @@ xfs_convert_page(
ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
xfs_map_at_offset(inode, bh, imap, offset);
- if (startio) {
- xfs_add_to_ioend(inode, bh, offset,
- type, ioendp, done);
- } else {
- set_buffer_dirty(bh);
- unlock_buffer(bh);
- mark_buffer_dirty(bh);
- }
+ xfs_add_to_ioend(inode, bh, offset, type,
+ ioendp, done);
+
page_dirty--;
count++;
} else {
type = IO_NEW;
- if (buffer_mapped(bh) && all_bh && startio) {
+ if (buffer_mapped(bh) && all_bh) {
lock_buffer(bh);
xfs_add_to_ioend(inode, bh, offset,
type, ioendp, done);
@@ -859,14 +851,12 @@ xfs_convert_page(
if (uptodate && bh == head)
SetPageUptodate(page);
- if (startio) {
- if (count) {
- wbc->nr_to_write--;
- if (wbc->nr_to_write <= 0)
- done = 1;
- }
- xfs_start_page_writeback(page, !page_dirty, count);
+ if (count) {
+ wbc->nr_to_write--;
+ if (wbc->nr_to_write <= 0)
+ done = 1;
}
+ xfs_start_page_writeback(page, !page_dirty, count);
return done;
fail_unlock_page:
@@ -886,7 +876,6 @@ xfs_cluster_write(
struct xfs_bmbt_irec *imap,
xfs_ioend_t **ioendp,
struct writeback_control *wbc,
- int startio,
int all_bh,
pgoff_t tlast)
{
@@ -902,7 +891,7 @@ xfs_cluster_write(
for (i = 0; i < pagevec_count(&pvec); i++) {
done = xfs_convert_page(inode, pvec.pages[i], tindex++,
- imap, ioendp, wbc, startio, all_bh);
+ imap, ioendp, wbc, all_bh);
if (done)
break;
}
@@ -981,7 +970,7 @@ xfs_aops_discard_page(
*/
error = xfs_bmapi(NULL, ip, offset_fsb, 1,
XFS_BMAPI_ENTIRE, NULL, 0, &imap,
- &nimaps, NULL, NULL);
+ &nimaps, NULL);
if (error) {
/* something screwed, just bail */
@@ -1009,7 +998,7 @@ xfs_aops_discard_page(
*/
xfs_bmap_init(&flist, &firstblock);
error = xfs_bunmapi(NULL, ip, offset_fsb, 1, 0, 1, &firstblock,
- &flist, NULL, &done);
+ &flist, &done);
ASSERT(!flist.xbf_count && !flist.xbf_first);
if (error) {
@@ -1032,50 +1021,66 @@ out_invalidate:
}
/*
- * Calling this without startio set means we are being asked to make a dirty
- * page ready for freeing it's buffers. When called with startio set then
- * we are coming from writepage.
+ * Write out a dirty page.
+ *
+ * For delalloc space on the page we need to allocate space and flush it.
+ * For unwritten space on the page we need to start the conversion to
+ * regular allocated space.
+ * For any other dirty buffer heads on the page we should flush them.
*
- * When called with startio set it is important that we write the WHOLE
- * page if possible.
- * The bh->b_state's cannot know if any of the blocks or which block for
- * that matter are dirty due to mmap writes, and therefore bh uptodate is
- * only valid if the page itself isn't completely uptodate. Some layers
- * may clear the page dirty flag prior to calling write page, under the
- * assumption the entire page will be written out; by not writing out the
- * whole page the page can be reused before all valid dirty data is
- * written out. Note: in the case of a page that has been dirty'd by
- * mapwrite and but partially setup by block_prepare_write the
- * bh->b_states's will not agree and only ones setup by BPW/BCW will have
- * valid state, thus the whole page must be written out thing.
+ * If we detect that a transaction would be required to flush the page, we
+ * have to check the process flags first, if we are already in a transaction
+ * or disk I/O during allocations is off, we need to fail the writepage and
+ * redirty the page.
*/
-
STATIC int
-xfs_page_state_convert(
- struct inode *inode,
- struct page *page,
- struct writeback_control *wbc,
- int startio,
- int unmapped) /* also implies page uptodate */
+xfs_vm_writepage(
+ struct page *page,
+ struct writeback_control *wbc)
{
+ struct inode *inode = page->mapping->host;
+ int delalloc, unwritten;
struct buffer_head *bh, *head;
struct xfs_bmbt_irec imap;
xfs_ioend_t *ioend = NULL, *iohead = NULL;
loff_t offset;
- unsigned long p_offset = 0;
unsigned int type;
__uint64_t end_offset;
pgoff_t end_index, last_index;
ssize_t size, len;
int flags, err, imap_valid = 0, uptodate = 1;
- int page_dirty, count = 0;
- int trylock = 0;
- int all_bh = unmapped;
+ int count = 0;
+ int all_bh = 0;
- if (startio) {
- if (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking)
- trylock |= BMAPI_TRYLOCK;
- }
+ trace_xfs_writepage(inode, page, 0);
+
+ ASSERT(page_has_buffers(page));
+
+ /*
+ * Refuse to write the page out if we are called from reclaim context.
+ *
+ * This avoids stack overflows when called from deeply used stacks in
+ * random callers for direct reclaim or memcg reclaim. We explicitly
+ * allow reclaim from kswapd as the stack usage there is relatively low.
+ *
+ * This should really be done by the core VM, but until that happens
+ * filesystems like XFS, btrfs and ext4 have to take care of this
+ * by themselves.
+ */
+ if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC)
+ goto out_fail;
+
+ /*
+ * We need a transaction if there are delalloc or unwritten buffers
+ * on the page.
+ *
+ * If we need a transaction and the process flags say we are already
+ * in a transaction, or no IO is allowed then mark the page dirty
+ * again and leave the page as is.
+ */
+ xfs_count_page_state(page, &delalloc, &unwritten);
+ if ((current->flags & PF_FSTRANS) && (delalloc || unwritten))
+ goto out_fail;
/* Is this page beyond the end of the file? */
offset = i_size_read(inode);
@@ -1084,50 +1089,33 @@ xfs_page_state_convert(
if (page->index >= end_index) {
if ((page->index >= end_index + 1) ||
!(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) {
- if (startio)
- unlock_page(page);
+ unlock_page(page);
return 0;
}
}
- /*
- * page_dirty is initially a count of buffers on the page before
- * EOF and is decremented as we move each into a cleanable state.
- *
- * Derivation:
- *
- * End offset is the highest offset that this page should represent.
- * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
- * will evaluate non-zero and be less than PAGE_CACHE_SIZE and
- * hence give us the correct page_dirty count. On any other page,
- * it will be zero and in that case we need page_dirty to be the
- * count of buffers on the page.
- */
end_offset = min_t(unsigned long long,
- (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset);
+ (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
+ offset);
len = 1 << inode->i_blkbits;
- p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
- PAGE_CACHE_SIZE);
- p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
- page_dirty = p_offset / len;
bh = head = page_buffers(page);
offset = page_offset(page);
flags = BMAPI_READ;
type = IO_NEW;
- /* TODO: cleanup count and page_dirty */
-
do {
if (offset >= end_offset)
break;
if (!buffer_uptodate(bh))
uptodate = 0;
- if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio) {
- /*
- * the iomap is actually still valid, but the ioend
- * isn't. shouldn't happen too often.
- */
+
+ /*
+ * A hole may still be marked uptodate because discard_buffer
+ * leaves the flag set.
+ */
+ if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
+ ASSERT(!buffer_dirty(bh));
imap_valid = 0;
continue;
}
@@ -1135,19 +1123,7 @@ xfs_page_state_convert(
if (imap_valid)
imap_valid = xfs_imap_valid(inode, &imap, offset);
- /*
- * First case, map an unwritten extent and prepare for
- * extent state conversion transaction on completion.
- *
- * Second case, allocate space for a delalloc buffer.
- * We can return EAGAIN here in the release page case.
- *
- * Third case, an unmapped buffer was found, and we are
- * in a path where we need to write the whole page out.
- */
- if (buffer_unwritten(bh) || buffer_delay(bh) ||
- ((buffer_uptodate(bh) || PageUptodate(page)) &&
- !buffer_mapped(bh) && (unmapped || startio))) {
+ if (buffer_unwritten(bh) || buffer_delay(bh)) {
int new_ioend = 0;
/*
@@ -1161,15 +1137,16 @@ xfs_page_state_convert(
flags = BMAPI_WRITE | BMAPI_IGNSTATE;
} else if (buffer_delay(bh)) {
type = IO_DELAY;
- flags = BMAPI_ALLOCATE | trylock;
- } else {
- type = IO_NEW;
- flags = BMAPI_WRITE | BMAPI_MMAP;
+ flags = BMAPI_ALLOCATE;
+
+ if (wbc->sync_mode == WB_SYNC_NONE &&
+ wbc->nonblocking)
+ flags |= BMAPI_TRYLOCK;
}
if (!imap_valid) {
/*
- * if we didn't have a valid mapping then we
+ * If we didn't have a valid mapping then we
* need to ensure that we put the new mapping
* in a new ioend structure. This needs to be
* done to ensure that the ioends correctly
@@ -1177,14 +1154,7 @@ xfs_page_state_convert(
* for unwritten extent conversion.
*/
new_ioend = 1;
- if (type == IO_NEW) {
- size = xfs_probe_cluster(inode,
- page, bh, head, 0);
- } else {
- size = len;
- }
-
- err = xfs_map_blocks(inode, offset, size,
+ err = xfs_map_blocks(inode, offset, len,
&imap, flags);
if (err)
goto error;
@@ -1193,19 +1163,11 @@ xfs_page_state_convert(
}
if (imap_valid) {
xfs_map_at_offset(inode, bh, &imap, offset);
- if (startio) {
- xfs_add_to_ioend(inode, bh, offset,
- type, &ioend,
- new_ioend);
- } else {
- set_buffer_dirty(bh);
- unlock_buffer(bh);
- mark_buffer_dirty(bh);
- }
- page_dirty--;
+ xfs_add_to_ioend(inode, bh, offset, type,
+ &ioend, new_ioend);
count++;
}
- } else if (buffer_uptodate(bh) && startio) {
+ } else if (buffer_uptodate(bh)) {
/*
* we got here because the buffer is already mapped.
* That means it must already have extents allocated
@@ -1213,8 +1175,7 @@ xfs_page_state_convert(
*/
if (!imap_valid || flags != BMAPI_READ) {
flags = BMAPI_READ;
- size = xfs_probe_cluster(inode, page, bh,
- head, 1);
+ size = xfs_probe_cluster(inode, page, bh, head);
err = xfs_map_blocks(inode, offset, size,
&imap, flags);
if (err)
@@ -1233,18 +1194,16 @@ xfs_page_state_convert(
*/
type = IO_NEW;
if (trylock_buffer(bh)) {
- ASSERT(buffer_mapped(bh));
if (imap_valid)
all_bh = 1;
xfs_add_to_ioend(inode, bh, offset, type,
&ioend, !imap_valid);
- page_dirty--;
count++;
} else {
imap_valid = 0;
}
- } else if ((buffer_uptodate(bh) || PageUptodate(page)) &&
- (unmapped || startio)) {
+ } else if (PageUptodate(page)) {
+ ASSERT(buffer_mapped(bh));
imap_valid = 0;
}
@@ -1256,8 +1215,7 @@ xfs_page_state_convert(
if (uptodate && bh == head)
SetPageUptodate(page);
- if (startio)
- xfs_start_page_writeback(page, 1, count);
+ xfs_start_page_writeback(page, 1, count);
if (ioend && imap_valid) {
xfs_off_t end_index;
@@ -1275,131 +1233,27 @@ xfs_page_state_convert(
end_index = last_index;
xfs_cluster_write(inode, page->index + 1, &imap, &ioend,
- wbc, startio, all_bh, end_index);
+ wbc, all_bh, end_index);
}
if (iohead)
xfs_submit_ioend(wbc, iohead);
- return page_dirty;
+ return 0;
error:
if (iohead)
xfs_cancel_ioend(iohead);
- /*
- * If it's delalloc and we have nowhere to put it,
- * throw it away, unless the lower layers told
- * us to try again.
- */
- if (err != -EAGAIN) {
- if (!unmapped)
- xfs_aops_discard_page(page);
- ClearPageUptodate(page);
- }
+ xfs_aops_discard_page(page);
+ ClearPageUptodate(page);
+ unlock_page(page);
return err;
-}
-
-/*
- * writepage: Called from one of two places:
- *
- * 1. we are flushing a delalloc buffer head.
- *
- * 2. we are writing out a dirty page. Typically the page dirty
- * state is cleared before we get here. In this case is it
- * conceivable we have no buffer heads.
- *
- * For delalloc space on the page we need to allocate space and
- * flush it. For unmapped buffer heads on the page we should
- * allocate space if the page is uptodate. For any other dirty
- * buffer heads on the page we should flush them.
- *
- * If we detect that a transaction would be required to flush
- * the page, we have to check the process flags first, if we
- * are already in a transaction or disk I/O during allocations
- * is off, we need to fail the writepage and redirty the page.
- */
-
-STATIC int
-xfs_vm_writepage(
- struct page *page,
- struct writeback_control *wbc)
-{
- int error;
- int need_trans;
- int delalloc, unmapped, unwritten;
- struct inode *inode = page->mapping->host;
-
- trace_xfs_writepage(inode, page, 0);
-
- /*
- * Refuse to write the page out if we are called from reclaim context.
- *
- * This is primarily to avoid stack overflows when called from deep
- * used stacks in random callers for direct reclaim, but disabling
- * reclaim for kswap is a nice side-effect as kswapd causes rather
- * suboptimal I/O patters, too.
- *
- * This should really be done by the core VM, but until that happens
- * filesystems like XFS, btrfs and ext4 have to take care of this
- * by themselves.
- */
- if (current->flags & PF_MEMALLOC)
- goto out_fail;
-
- /*
- * We need a transaction if:
- * 1. There are delalloc buffers on the page
- * 2. The page is uptodate and we have unmapped buffers
- * 3. The page is uptodate and we have no buffers
- * 4. There are unwritten buffers on the page
- */
-
- if (!page_has_buffers(page)) {
- unmapped = 1;
- need_trans = 1;
- } else {
- xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
- if (!PageUptodate(page))
- unmapped = 0;
- need_trans = delalloc + unmapped + unwritten;
- }
-
- /*
- * If we need a transaction and the process flags say
- * we are already in a transaction, or no IO is allowed
- * then mark the page dirty again and leave the page
- * as is.
- */
- if (current_test_flags(PF_FSTRANS) && need_trans)
- goto out_fail;
-
- /*
- * Delay hooking up buffer heads until we have
- * made our go/no-go decision.
- */
- if (!page_has_buffers(page))
- create_empty_buffers(page, 1 << inode->i_blkbits, 0);
-
- /*
- * Convert delayed allocate, unwritten or unmapped space
- * to real space and flush out to disk.
- */
- error = xfs_page_state_convert(inode, page, wbc, 1, unmapped);
- if (error == -EAGAIN)
- goto out_fail;
- if (unlikely(error < 0))
- goto out_unlock;
-
- return 0;
out_fail:
redirty_page_for_writepage(wbc, page);
unlock_page(page);
return 0;
-out_unlock:
- unlock_page(page);
- return error;
}
STATIC int
@@ -1413,65 +1267,27 @@ xfs_vm_writepages(
/*
* Called to move a page into cleanable state - and from there
- * to be released. Possibly the page is already clean. We always
+ * to be released. The page should already be clean. We always
* have buffer heads in this call.
*
- * Returns 0 if the page is ok to release, 1 otherwise.
- *
- * Possible scenarios are:
- *
- * 1. We are being called to release a page which has been written
- * to via regular I/O. buffer heads will be dirty and possibly
- * delalloc. If no delalloc buffer heads in this case then we
- * can just return zero.
- *
- * 2. We are called to release a page which has been written via
- * mmap, all we need to do is ensure there is no delalloc
- * state in the buffer heads, if not we can let the caller
- * free them and we should come back later via writepage.
+ * Returns 1 if the page is ok to release, 0 otherwise.
*/
STATIC int
xfs_vm_releasepage(
struct page *page,
gfp_t gfp_mask)
{
- struct inode *inode = page->mapping->host;
- int dirty, delalloc, unmapped, unwritten;
- struct writeback_control wbc = {
- .sync_mode = WB_SYNC_ALL,
- .nr_to_write = 1,
- };
+ int delalloc, unwritten;
- trace_xfs_releasepage(inode, page, 0);
-
- if (!page_has_buffers(page))
- return 0;
+ trace_xfs_releasepage(page->mapping->host, page, 0);
- xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
- if (!delalloc && !unwritten)
- goto free_buffers;
+ xfs_count_page_state(page, &delalloc, &unwritten);
- if (!(gfp_mask & __GFP_FS))
+ if (WARN_ON(delalloc))
return 0;
-
- /* If we are already inside a transaction or the thread cannot
- * do I/O, we cannot release this page.
- */
- if (current_test_flags(PF_FSTRANS))
+ if (WARN_ON(unwritten))
return 0;
- /*
- * Convert delalloc space to real space, do not flush the
- * data out to disk, that will be done by the caller.
- * Never need to allocate space here - we will always
- * come back to writepage in that case.
- */
- dirty = xfs_page_state_convert(inode, page, &wbc, 0, 0);
- if (dirty == 0 && !unwritten)
- goto free_buffers;
- return 0;
-
-free_buffers:
return try_to_free_buffers(page);
}
@@ -1481,9 +1297,9 @@ __xfs_get_blocks(
sector_t iblock,
struct buffer_head *bh_result,
int create,
- int direct,
- bmapi_flags_t flags)
+ int direct)
{
+ int flags = create ? BMAPI_WRITE : BMAPI_READ;
struct xfs_bmbt_irec imap;
xfs_off_t offset;
ssize_t size;
@@ -1498,8 +1314,11 @@ __xfs_get_blocks(
if (!create && direct && offset >= i_size_read(inode))
return 0;
- error = xfs_iomap(XFS_I(inode), offset, size,
- create ? flags : BMAPI_READ, &imap, &nimap, &new);
+ if (direct && create)
+ flags |= BMAPI_DIRECT;
+
+ error = xfs_iomap(XFS_I(inode), offset, size, flags, &imap, &nimap,
+ &new);
if (error)
return -error;
if (nimap == 0)
@@ -1579,8 +1398,7 @@ xfs_get_blocks(
struct buffer_head *bh_result,
int create)
{
- return __xfs_get_blocks(inode, iblock,
- bh_result, create, 0, BMAPI_WRITE);
+ return __xfs_get_blocks(inode, iblock, bh_result, create, 0);
}
STATIC int
@@ -1590,61 +1408,59 @@ xfs_get_blocks_direct(
struct buffer_head *bh_result,
int create)
{
- return __xfs_get_blocks(inode, iblock,
- bh_result, create, 1, BMAPI_WRITE|BMAPI_DIRECT);
+ return __xfs_get_blocks(inode, iblock, bh_result, create, 1);
}
+/*
+ * Complete a direct I/O write request.
+ *
+ * If the private argument is non-NULL __xfs_get_blocks signals us that we
+ * need to issue a transaction to convert the range from unwritten to written
+ * extents. In case this is regular synchronous I/O we just call xfs_end_io
+ * to do this and we are done. But in case this was a successfull AIO
+ * request this handler is called from interrupt context, from which we
+ * can't start transactions. In that case offload the I/O completion to
+ * the workqueues we also use for buffered I/O completion.
+ */
STATIC void
-xfs_end_io_direct(
- struct kiocb *iocb,
- loff_t offset,
- ssize_t size,
- void *private)
+xfs_end_io_direct_write(
+ struct kiocb *iocb,
+ loff_t offset,
+ ssize_t size,
+ void *private,
+ int ret,
+ bool is_async)
{
- xfs_ioend_t *ioend = iocb->private;
+ struct xfs_ioend *ioend = iocb->private;
/*
- * Non-NULL private data means we need to issue a transaction to
- * convert a range from unwritten to written extents. This needs
- * to happen from process context but aio+dio I/O completion
- * happens from irq context so we need to defer it to a workqueue.
- * This is not necessary for synchronous direct I/O, but we do
- * it anyway to keep the code uniform and simpler.
- *
- * Well, if only it were that simple. Because synchronous direct I/O
- * requires extent conversion to occur *before* we return to userspace,
- * we have to wait for extent conversion to complete. Look at the
- * iocb that has been passed to us to determine if this is AIO or
- * not. If it is synchronous, tell xfs_finish_ioend() to kick the
- * workqueue and wait for it to complete.
- *
- * The core direct I/O code might be changed to always call the
- * completion handler in the future, in which case all this can
- * go away.
+ * blockdev_direct_IO can return an error even after the I/O
+ * completion handler was called. Thus we need to protect
+ * against double-freeing.
*/
+ iocb->private = NULL;
+
ioend->io_offset = offset;
ioend->io_size = size;
- if (ioend->io_type == IO_READ) {
- xfs_finish_ioend(ioend, 0);
- } else if (private && size > 0) {
- xfs_finish_ioend(ioend, is_sync_kiocb(iocb));
- } else {
+ if (private && size > 0)
+ ioend->io_type = IO_UNWRITTEN;
+
+ if (is_async) {
/*
- * A direct I/O write ioend starts it's life in unwritten
- * state in case they map an unwritten extent. This write
- * didn't map an unwritten extent so switch it's completion
- * handler.
+ * If we are converting an unwritten extent we need to delay
+ * the AIO completion until after the unwrittent extent
+ * conversion has completed, otherwise do it ASAP.
*/
- ioend->io_type = IO_NEW;
- xfs_finish_ioend(ioend, 0);
+ if (ioend->io_type == IO_UNWRITTEN) {
+ ioend->io_iocb = iocb;
+ ioend->io_result = ret;
+ } else {
+ aio_complete(iocb, ret, 0);
+ }
+ xfs_finish_ioend(ioend);
+ } else {
+ xfs_finish_ioend_sync(ioend);
}
-
- /*
- * blockdev_direct_IO can return an error even after the I/O
- * completion handler was called. Thus we need to protect
- * against double-freeing.
- */
- iocb->private = NULL;
}
STATIC ssize_t
@@ -1655,23 +1471,26 @@ xfs_vm_direct_IO(
loff_t offset,
unsigned long nr_segs)
{
- struct file *file = iocb->ki_filp;
- struct inode *inode = file->f_mapping->host;
- struct block_device *bdev;
- ssize_t ret;
-
- bdev = xfs_find_bdev_for_inode(inode);
-
- iocb->private = xfs_alloc_ioend(inode, rw == WRITE ?
- IO_UNWRITTEN : IO_READ);
-
- ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov,
- offset, nr_segs,
- xfs_get_blocks_direct,
- xfs_end_io_direct);
+ struct inode *inode = iocb->ki_filp->f_mapping->host;
+ struct block_device *bdev = xfs_find_bdev_for_inode(inode);
+ ssize_t ret;
+
+ if (rw & WRITE) {
+ iocb->private = xfs_alloc_ioend(inode, IO_NEW);
+
+ ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov,
+ offset, nr_segs,
+ xfs_get_blocks_direct,
+ xfs_end_io_direct_write);
+ if (ret != -EIOCBQUEUED && iocb->private)
+ xfs_destroy_ioend(iocb->private);
+ } else {
+ ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov,
+ offset, nr_segs,
+ xfs_get_blocks_direct,
+ NULL);
+ }
- if (unlikely(ret != -EIOCBQUEUED && iocb->private))
- xfs_destroy_ioend(iocb->private);
return ret;
}
@@ -1686,8 +1505,8 @@ xfs_vm_write_begin(
void **fsdata)
{
*pagep = NULL;
- return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
- xfs_get_blocks);
+ return block_write_begin(file, mapping, pos, len, flags | AOP_FLAG_NOFS,
+ pagep, fsdata, xfs_get_blocks);
}
STATIC sector_t
@@ -1698,7 +1517,7 @@ xfs_vm_bmap(
struct inode *inode = (struct inode *)mapping->host;
struct xfs_inode *ip = XFS_I(inode);
- xfs_itrace_entry(XFS_I(inode));
+ trace_xfs_vm_bmap(XFS_I(inode));
xfs_ilock(ip, XFS_IOLOCK_SHARED);
xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF);
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index 4cfc6ea87df..c5057fb6237 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -37,6 +37,8 @@ typedef struct xfs_ioend {
size_t io_size; /* size of the extent */
xfs_off_t io_offset; /* offset in the file */
struct work_struct io_work; /* xfsdatad work queue */
+ struct kiocb *io_iocb;
+ int io_result;
} xfs_ioend_t;
extern const struct address_space_operations xfs_address_space_operations;
@@ -45,6 +47,6 @@ extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int);
extern void xfs_ioend_init(void);
extern void xfs_ioend_wait(struct xfs_inode *);
-extern void xfs_count_page_state(struct page *, int *, int *, int *);
+extern void xfs_count_page_state(struct page *, int *, int *);
#endif /* __XFS_AOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 649ade8ef59..ea79072f521 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -39,13 +39,12 @@
#include "xfs_inum.h"
#include "xfs_log.h"
#include "xfs_ag.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_trace.h"
static kmem_zone_t *xfs_buf_zone;
STATIC int xfsbufd(void *);
-STATIC int xfsbufd_wakeup(int, gfp_t);
+STATIC int xfsbufd_wakeup(struct shrinker *, int, gfp_t);
STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
static struct shrinker xfs_buf_shake = {
.shrink = xfsbufd_wakeup,
@@ -340,7 +339,7 @@ _xfs_buf_lookup_pages(
__func__, gfp_mask);
XFS_STATS_INC(xb_page_retries);
- xfsbufd_wakeup(0, gfp_mask);
+ xfsbufd_wakeup(NULL, 0, gfp_mask);
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto retry;
}
@@ -579,9 +578,9 @@ _xfs_buf_read(
XBF_READ_AHEAD | _XBF_RUN_QUEUES);
status = xfs_buf_iorequest(bp);
- if (!status && !(flags & XBF_ASYNC))
- status = xfs_buf_iowait(bp);
- return status;
+ if (status || XFS_BUF_ISERROR(bp) || (flags & XBF_ASYNC))
+ return status;
+ return xfs_buf_iowait(bp);
}
xfs_buf_t *
@@ -897,36 +896,6 @@ xfs_buf_unlock(
trace_xfs_buf_unlock(bp, _RET_IP_);
}
-
-/*
- * Pinning Buffer Storage in Memory
- * Ensure that no attempt to force a buffer to disk will succeed.
- */
-void
-xfs_buf_pin(
- xfs_buf_t *bp)
-{
- trace_xfs_buf_pin(bp, _RET_IP_);
- atomic_inc(&bp->b_pin_count);
-}
-
-void
-xfs_buf_unpin(
- xfs_buf_t *bp)
-{
- trace_xfs_buf_unpin(bp, _RET_IP_);
-
- if (atomic_dec_and_test(&bp->b_pin_count))
- wake_up_all(&bp->b_waiters);
-}
-
-int
-xfs_buf_ispin(
- xfs_buf_t *bp)
-{
- return atomic_read(&bp->b_pin_count);
-}
-
STATIC void
xfs_buf_wait_unpin(
xfs_buf_t *bp)
@@ -1018,13 +987,12 @@ xfs_bwrite(
{
int error;
- bp->b_strat = xfs_bdstrat_cb;
bp->b_mount = mp;
bp->b_flags |= XBF_WRITE;
bp->b_flags &= ~(XBF_ASYNC | XBF_READ);
xfs_buf_delwri_dequeue(bp);
- xfs_buf_iostrategy(bp);
+ xfs_bdstrat_cb(bp);
error = xfs_buf_iowait(bp);
if (error)
@@ -1040,7 +1008,6 @@ xfs_bdwrite(
{
trace_xfs_buf_bdwrite(bp, _RET_IP_);
- bp->b_strat = xfs_bdstrat_cb;
bp->b_mount = mp;
bp->b_flags &= ~XBF_READ;
@@ -1075,7 +1042,6 @@ xfs_bioerror(
XFS_BUF_UNDONE(bp);
XFS_BUF_STALE(bp);
- XFS_BUF_CLR_BDSTRAT_FUNC(bp);
xfs_biodone(bp);
return EIO;
@@ -1105,7 +1071,6 @@ xfs_bioerror_relse(
XFS_BUF_DONE(bp);
XFS_BUF_STALE(bp);
XFS_BUF_CLR_IODONE_FUNC(bp);
- XFS_BUF_CLR_BDSTRAT_FUNC(bp);
if (!(fl & XBF_ASYNC)) {
/*
* Mark b_error and B_ERROR _both_.
@@ -1311,8 +1276,19 @@ submit_io:
if (size)
goto next_chunk;
} else {
- bio_put(bio);
+ /*
+ * if we get here, no pages were added to the bio. However,
+ * we can't just error out here - if the pages are locked then
+ * we have to unlock them otherwise we can hang on a later
+ * access to the page.
+ */
xfs_buf_ioerror(bp, EIO);
+ if (bp->b_flags & _XBF_PAGE_LOCKED) {
+ int i;
+ for (i = 0; i < bp->b_page_count; i++)
+ unlock_page(bp->b_pages[i]);
+ }
+ bio_put(bio);
}
}
@@ -1762,6 +1738,7 @@ xfs_buf_runall_queues(
STATIC int
xfsbufd_wakeup(
+ struct shrinker *shrink,
int priority,
gfp_t mask)
{
@@ -1803,7 +1780,7 @@ xfs_buf_delwri_split(
trace_xfs_buf_delwri_split(bp, _RET_IP_);
ASSERT(bp->b_flags & XBF_DELWRI);
- if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) {
+ if (!XFS_BUF_ISPINNED(bp) && !xfs_buf_cond_lock(bp)) {
if (!force &&
time_before(jiffies, bp->b_queuetime + age)) {
xfs_buf_unlock(bp);
@@ -1888,7 +1865,7 @@ xfsbufd(
struct xfs_buf *bp;
bp = list_first_entry(&tmp, struct xfs_buf, b_list);
list_del_init(&bp->b_list);
- xfs_buf_iostrategy(bp);
+ xfs_bdstrat_cb(bp);
count++;
}
if (count)
@@ -1935,7 +1912,7 @@ xfs_flush_buftarg(
bp->b_flags &= ~XBF_ASYNC;
list_add(&bp->b_list, &wait_list);
}
- xfs_buf_iostrategy(bp);
+ xfs_bdstrat_cb(bp);
}
if (wait) {
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 5fbecefa5df..d072e5ff923 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -44,57 +44,57 @@ typedef enum {
XBRW_ZERO = 3, /* Zero target memory */
} xfs_buf_rw_t;
-typedef enum {
- XBF_READ = (1 << 0), /* buffer intended for reading from device */
- XBF_WRITE = (1 << 1), /* buffer intended for writing to device */
- XBF_MAPPED = (1 << 2), /* buffer mapped (b_addr valid) */
- XBF_ASYNC = (1 << 4), /* initiator will not wait for completion */
- XBF_DONE = (1 << 5), /* all pages in the buffer uptodate */
- XBF_DELWRI = (1 << 6), /* buffer has dirty pages */
- XBF_STALE = (1 << 7), /* buffer has been staled, do not find it */
- XBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */
- XBF_ORDERED = (1 << 11), /* use ordered writes */
- XBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */
- XBF_LOG_BUFFER = (1 << 13), /* this is a buffer used for the log */
-
- /* flags used only as arguments to access routines */
- XBF_LOCK = (1 << 14), /* lock requested */
- XBF_TRYLOCK = (1 << 15), /* lock requested, but do not wait */
- XBF_DONT_BLOCK = (1 << 16), /* do not block in current thread */
-
- /* flags used only internally */
- _XBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */
- _XBF_PAGES = (1 << 18), /* backed by refcounted pages */
- _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */
- _XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */
-
- /*
- * Special flag for supporting metadata blocks smaller than a FSB.
- *
- * In this case we can have multiple xfs_buf_t on a single page and
- * need to lock out concurrent xfs_buf_t readers as they only
- * serialise access to the buffer.
- *
- * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation
- * between reads of the page. Hence we can have one thread read the
- * page and modify it, but then race with another thread that thinks
- * the page is not up-to-date and hence reads it again.
- *
- * The result is that the first modifcation to the page is lost.
- * This sort of AGF/AGI reading race can happen when unlinking inodes
- * that require truncation and results in the AGI unlinked list
- * modifications being lost.
- */
- _XBF_PAGE_LOCKED = (1 << 22),
-
- /*
- * If we try a barrier write, but it fails we have to communicate
- * this to the upper layers. Unfortunately b_error gets overwritten
- * when the buffer is re-issued so we have to add another flag to
- * keep this information.
- */
- _XFS_BARRIER_FAILED = (1 << 23),
-} xfs_buf_flags_t;
+#define XBF_READ (1 << 0) /* buffer intended for reading from device */
+#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */
+#define XBF_MAPPED (1 << 2) /* buffer mapped (b_addr valid) */
+#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */
+#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */
+#define XBF_DELWRI (1 << 6) /* buffer has dirty pages */
+#define XBF_STALE (1 << 7) /* buffer has been staled, do not find it */
+#define XBF_FS_MANAGED (1 << 8) /* filesystem controls freeing memory */
+#define XBF_ORDERED (1 << 11)/* use ordered writes */
+#define XBF_READ_AHEAD (1 << 12)/* asynchronous read-ahead */
+#define XBF_LOG_BUFFER (1 << 13)/* this is a buffer used for the log */
+
+/* flags used only as arguments to access routines */
+#define XBF_LOCK (1 << 14)/* lock requested */
+#define XBF_TRYLOCK (1 << 15)/* lock requested, but do not wait */
+#define XBF_DONT_BLOCK (1 << 16)/* do not block in current thread */
+
+/* flags used only internally */
+#define _XBF_PAGE_CACHE (1 << 17)/* backed by pagecache */
+#define _XBF_PAGES (1 << 18)/* backed by refcounted pages */
+#define _XBF_RUN_QUEUES (1 << 19)/* run block device task queue */
+#define _XBF_DELWRI_Q (1 << 21)/* buffer on delwri queue */
+
+/*
+ * Special flag for supporting metadata blocks smaller than a FSB.
+ *
+ * In this case we can have multiple xfs_buf_t on a single page and
+ * need to lock out concurrent xfs_buf_t readers as they only
+ * serialise access to the buffer.
+ *
+ * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation
+ * between reads of the page. Hence we can have one thread read the
+ * page and modify it, but then race with another thread that thinks
+ * the page is not up-to-date and hence reads it again.
+ *
+ * The result is that the first modifcation to the page is lost.
+ * This sort of AGF/AGI reading race can happen when unlinking inodes
+ * that require truncation and results in the AGI unlinked list
+ * modifications being lost.
+ */
+#define _XBF_PAGE_LOCKED (1 << 22)
+
+/*
+ * If we try a barrier write, but it fails we have to communicate
+ * this to the upper layers. Unfortunately b_error gets overwritten
+ * when the buffer is re-issued so we have to add another flag to
+ * keep this information.
+ */
+#define _XFS_BARRIER_FAILED (1 << 23)
+
+typedef unsigned int xfs_buf_flags_t;
#define XFS_BUF_FLAGS \
{ XBF_READ, "READ" }, \
@@ -187,7 +187,6 @@ typedef struct xfs_buf {
atomic_t b_io_remaining; /* #outstanding I/O requests */
xfs_buf_iodone_t b_iodone; /* I/O completion function */
xfs_buf_relse_t b_relse; /* releasing function */
- xfs_buf_bdstrat_t b_strat; /* pre-write function */
struct completion b_iowait; /* queue for I/O waiters */
void *b_fspriv;
void *b_fspriv2;
@@ -245,11 +244,6 @@ extern int xfs_buf_iowait(xfs_buf_t *);
extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
xfs_buf_rw_t);
-static inline int xfs_buf_iostrategy(xfs_buf_t *bp)
-{
- return bp->b_strat ? bp->b_strat(bp) : xfs_buf_iorequest(bp);
-}
-
static inline int xfs_buf_geterror(xfs_buf_t *bp)
{
return bp ? bp->b_error : ENOMEM;
@@ -258,11 +252,6 @@ static inline int xfs_buf_geterror(xfs_buf_t *bp)
/* Buffer Utility Routines */
extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t);
-/* Pinning Buffer Storage in Memory */
-extern void xfs_buf_pin(xfs_buf_t *);
-extern void xfs_buf_unpin(xfs_buf_t *);
-extern int xfs_buf_ispin(xfs_buf_t *);
-
/* Delayed Write Buffer Routines */
extern void xfs_buf_delwri_dequeue(xfs_buf_t *);
extern void xfs_buf_delwri_promote(xfs_buf_t *);
@@ -326,8 +315,6 @@ extern void xfs_buf_terminate(void);
#define XFS_BUF_IODONE_FUNC(bp) ((bp)->b_iodone)
#define XFS_BUF_SET_IODONE_FUNC(bp, func) ((bp)->b_iodone = (func))
#define XFS_BUF_CLR_IODONE_FUNC(bp) ((bp)->b_iodone = NULL)
-#define XFS_BUF_SET_BDSTRAT_FUNC(bp, func) ((bp)->b_strat = (func))
-#define XFS_BUF_CLR_BDSTRAT_FUNC(bp) ((bp)->b_strat = NULL)
#define XFS_BUF_FSPRIVATE(bp, type) ((type)(bp)->b_fspriv)
#define XFS_BUF_SET_FSPRIVATE(bp, val) ((bp)->b_fspriv = (void*)(val))
@@ -351,7 +338,7 @@ extern void xfs_buf_terminate(void);
#define XFS_BUF_SET_VTYPE(bp, type) do { } while (0)
#define XFS_BUF_SET_REF(bp, ref) do { } while (0)
-#define XFS_BUF_ISPINNED(bp) xfs_buf_ispin(bp)
+#define XFS_BUF_ISPINNED(bp) atomic_read(&((bp)->b_pin_count))
#define XFS_BUF_VALUSEMA(bp) xfs_buf_lock_value(bp)
#define XFS_BUF_CPSEMA(bp) (xfs_buf_cond_lock(bp) == 0)
@@ -370,8 +357,6 @@ static inline void xfs_buf_relse(xfs_buf_t *bp)
xfs_buf_rele(bp);
}
-#define xfs_bpin(bp) xfs_buf_pin(bp)
-#define xfs_bunpin(bp) xfs_buf_unpin(bp)
#define xfs_biodone(bp) xfs_buf_ioend(bp, 0)
#define xfs_biomove(bp, off, len, data, rw) \
diff --git a/fs/xfs/linux-2.6/xfs_dmapi_priv.h b/fs/xfs/linux-2.6/xfs_dmapi_priv.h
deleted file mode 100644
index a8b0b1685ee..00000000000
--- a/fs/xfs/linux-2.6/xfs_dmapi_priv.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_DMAPI_PRIV_H__
-#define __XFS_DMAPI_PRIV_H__
-
-/*
- * Based on IO_ISDIRECT, decide which i_ flag is set.
- */
-#define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \
- DM_FLAGS_IMUX : 0)
-#define DM_SEM_FLAG_WR (DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_IMUX)
-
-#endif /*__XFS_DMAPI_PRIV_H__*/
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 846b75aeb2a..3764d74790e 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -23,13 +23,13 @@
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_export.h"
#include "xfs_vnodeops.h"
#include "xfs_bmap_btree.h"
#include "xfs_inode.h"
#include "xfs_inode_item.h"
+#include "xfs_trace.h"
/*
* Note that we only accept fileids which are long enough rather than allow
@@ -128,13 +128,11 @@ xfs_nfs_get_inode(
return ERR_PTR(-ESTALE);
/*
- * The XFS_IGET_BULKSTAT means that an invalid inode number is just
- * fine and not an indication of a corrupted filesystem. Because
- * clients can send any kind of invalid file handle, e.g. after
- * a restore on the server we have to deal with this case gracefully.
+ * The XFS_IGET_UNTRUSTED means that an invalid inode number is just
+ * fine and not an indication of a corrupted filesystem as clients can
+ * send invalid file handles and we have to handle it gracefully..
*/
- error = xfs_iget(mp, NULL, ino, XFS_IGET_BULKSTAT,
- XFS_ILOCK_SHARED, &ip, 0);
+ error = xfs_iget(mp, NULL, ino, XFS_IGET_UNTRUSTED, 0, &ip);
if (error) {
/*
* EINVAL means the inode cluster doesn't exist anymore.
@@ -149,11 +147,10 @@ xfs_nfs_get_inode(
}
if (ip->i_d.di_gen != generation) {
- xfs_iput_new(ip, XFS_ILOCK_SHARED);
+ IRELE(ip);
return ERR_PTR(-ENOENT);
}
- xfs_iunlock(ip, XFS_ILOCK_SHARED);
return VFS_I(ip);
}
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 257a56b127c..ba8ad422a16 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -22,23 +22,15 @@
#include "xfs_inum.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
#include "xfs_trans.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
#include "xfs_alloc.h"
-#include "xfs_btree.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dir2_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_inode_item.h"
#include "xfs_bmap.h"
#include "xfs_error.h"
-#include "xfs_rw.h"
#include "xfs_vnodeops.h"
#include "xfs_da_btree.h"
#include "xfs_ioctl.h"
@@ -108,7 +100,7 @@ xfs_file_fsync(
int error = 0;
int log_flushed = 0;
- xfs_itrace_entry(ip);
+ trace_xfs_file_fsync(ip);
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return -XFS_ERROR(EIO);
@@ -166,8 +158,7 @@ xfs_file_fsync(
* transaction. So we play it safe and fire off the
* transaction anyway.
*/
- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
- xfs_trans_ihold(tp, ip);
+ xfs_trans_ijoin(tp, ip);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
xfs_trans_set_sync(tp);
error = _xfs_trans_commit(tp, 0, &log_flushed);
@@ -275,20 +266,6 @@ xfs_file_aio_read(
mutex_lock(&inode->i_mutex);
xfs_ilock(ip, XFS_IOLOCK_SHARED);
- if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
- int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags);
- int iolock = XFS_IOLOCK_SHARED;
-
- ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, iocb->ki_pos, size,
- dmflags, &iolock);
- if (ret) {
- xfs_iunlock(ip, XFS_IOLOCK_SHARED);
- if (unlikely(ioflags & IO_ISDIRECT))
- mutex_unlock(&inode->i_mutex);
- return ret;
- }
- }
-
if (unlikely(ioflags & IO_ISDIRECT)) {
if (inode->i_mapping->nrpages) {
ret = -xfs_flushinval_pages(ip,
@@ -321,7 +298,6 @@ xfs_file_splice_read(
unsigned int flags)
{
struct xfs_inode *ip = XFS_I(infilp->f_mapping->host);
- struct xfs_mount *mp = ip->i_mount;
int ioflags = 0;
ssize_t ret;
@@ -335,18 +311,6 @@ xfs_file_splice_read(
xfs_ilock(ip, XFS_IOLOCK_SHARED);
- if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
- int iolock = XFS_IOLOCK_SHARED;
- int error;
-
- error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *ppos, count,
- FILP_DELAY_FLAG(infilp), &iolock);
- if (error) {
- xfs_iunlock(ip, XFS_IOLOCK_SHARED);
- return -error;
- }
- }
-
trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
@@ -367,7 +331,6 @@ xfs_file_splice_write(
{
struct inode *inode = outfilp->f_mapping->host;
struct xfs_inode *ip = XFS_I(inode);
- struct xfs_mount *mp = ip->i_mount;
xfs_fsize_t isize, new_size;
int ioflags = 0;
ssize_t ret;
@@ -382,18 +345,6 @@ xfs_file_splice_write(
xfs_ilock(ip, XFS_IOLOCK_EXCL);
- if (DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS)) {
- int iolock = XFS_IOLOCK_EXCL;
- int error;
-
- error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, *ppos, count,
- FILP_DELAY_FLAG(outfilp), &iolock);
- if (error) {
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
- return -error;
- }
- }
-
new_size = *ppos + count;
xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -463,7 +414,7 @@ xfs_zero_last_block(
last_fsb = XFS_B_TO_FSBT(mp, isize);
nimaps = 1;
error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap,
- &nimaps, NULL, NULL);
+ &nimaps, NULL);
if (error) {
return error;
}
@@ -558,7 +509,7 @@ xfs_zero_eof(
nimaps = 1;
zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb,
- 0, NULL, 0, &imap, &nimaps, NULL, NULL);
+ 0, NULL, 0, &imap, &nimaps, NULL);
if (error) {
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
return error;
@@ -627,7 +578,6 @@ xfs_file_aio_write(
int ioflags = 0;
xfs_fsize_t isize, new_size;
int iolock;
- int eventsent = 0;
size_t ocount = 0, count;
int need_i_mutex;
@@ -673,33 +623,6 @@ start:
goto out_unlock_mutex;
}
- if ((DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) &&
- !(ioflags & IO_INVIS) && !eventsent)) {
- int dmflags = FILP_DELAY_FLAG(file);
-
- if (need_i_mutex)
- dmflags |= DM_FLAGS_IMUX;
-
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- error = XFS_SEND_DATA(ip->i_mount, DM_EVENT_WRITE, ip,
- pos, count, dmflags, &iolock);
- if (error) {
- goto out_unlock_internal;
- }
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- eventsent = 1;
-
- /*
- * The iolock was dropped and reacquired in XFS_SEND_DATA
- * so we have to recheck the size when appending.
- * We will only "goto start;" once, since having sent the
- * event prevents another call to XFS_SEND_DATA, which is
- * what allows the size to change in the first place.
- */
- if ((file->f_flags & O_APPEND) && pos != ip->i_size)
- goto start;
- }
-
if (ioflags & IO_ISDIRECT) {
xfs_buftarg_t *target =
XFS_IS_REALTIME_INODE(ip) ?
@@ -830,22 +753,6 @@ write_retry:
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
- if (ret == -ENOSPC &&
- DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) {
- xfs_iunlock(ip, iolock);
- if (need_i_mutex)
- mutex_unlock(&inode->i_mutex);
- error = XFS_SEND_NAMESP(ip->i_mount, DM_EVENT_NOSPACE, ip,
- DM_RIGHT_NULL, ip, DM_RIGHT_NULL, NULL, NULL,
- 0, 0, 0); /* Delay flag intentionally unused */
- if (need_i_mutex)
- mutex_lock(&inode->i_mutex);
- xfs_ilock(ip, iolock);
- if (error)
- goto out_unlock_internal;
- goto start;
- }
-
error = -ret;
if (ret <= 0)
goto out_unlock_internal;
@@ -1014,9 +921,6 @@ const struct file_operations xfs_file_operations = {
.open = xfs_file_open,
.release = xfs_file_release,
.fsync = xfs_file_fsync,
-#ifdef HAVE_FOP_OPEN_EXEC
- .open_exec = xfs_file_open_exec,
-#endif
};
const struct file_operations xfs_dir_file_operations = {
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
index b6918d76bc7..1f279b012f9 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -21,10 +21,6 @@
#include "xfs_inode.h"
#include "xfs_trace.h"
-int fs_noerr(void) { return 0; }
-int fs_nosys(void) { return ENOSYS; }
-void fs_noval(void) { return; }
-
/*
* note: all filemap functions return negative error codes. These
* need to be inverted before returning to the xfs core functions.
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.h b/fs/xfs/linux-2.6/xfs_fs_subr.h
deleted file mode 100644
index 82bb19b2599..00000000000
--- a/fs/xfs/linux-2.6/xfs_fs_subr.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_FS_SUBR_H__
-#define __XFS_FS_SUBR_H__
-
-extern int fs_noerr(void);
-extern int fs_nosys(void);
-extern void fs_noval(void);
-
-#endif /* __XFS_FS_SUBR_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 699b60cbab9..237f5ffb2ee 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -23,24 +23,15 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
#include "xfs_alloc.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dir2_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_ioctl.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
#include "xfs_rtalloc.h"
#include "xfs_itable.h"
#include "xfs_error.h"
-#include "xfs_rw.h"
#include "xfs_attr.h"
#include "xfs_bmap.h"
#include "xfs_buf_item.h"
@@ -679,10 +670,9 @@ xfs_ioc_bulkstat(
error = xfs_bulkstat_single(mp, &inlast,
bulkreq.ubuffer, &done);
else /* XFS_IOC_FSBULKSTAT */
- error = xfs_bulkstat(mp, &inlast, &count,
- (bulkstat_one_pf)xfs_bulkstat_one, NULL,
- sizeof(xfs_bstat_t), bulkreq.ubuffer,
- BULKSTAT_FG_QUICK, &done);
+ error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one,
+ sizeof(xfs_bstat_t), bulkreq.ubuffer,
+ &done);
if (error)
return -error;
@@ -909,7 +899,7 @@ xfs_ioctl_setattr(
struct xfs_dquot *olddquot = NULL;
int code;
- xfs_itrace_entry(ip);
+ trace_xfs_ioctl_setattr(ip);
if (mp->m_flags & XFS_MOUNT_RDONLY)
return XFS_ERROR(EROFS);
@@ -1044,8 +1034,7 @@ xfs_ioctl_setattr(
}
}
- xfs_trans_ijoin(tp, ip, lock_flags);
- xfs_trans_ihold(tp, ip);
+ xfs_trans_ijoin(tp, ip);
/*
* Change file ownership. Must be the owner or privileged.
@@ -1117,16 +1106,7 @@ xfs_ioctl_setattr(
xfs_qm_dqrele(udqp);
xfs_qm_dqrele(gdqp);
- if (code)
- return code;
-
- if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE)) {
- XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, ip, DM_RIGHT_NULL,
- NULL, DM_RIGHT_NULL, NULL, NULL, 0, 0,
- (mask & FSX_NONBLOCK) ? DM_FLAGS_NDELAY : 0);
- }
-
- return 0;
+ return code;
error_return:
xfs_qm_dqrele(udqp);
@@ -1302,7 +1282,7 @@ xfs_file_ioctl(
if (filp->f_mode & FMODE_NOCMTIME)
ioflags |= IO_INVIS;
- xfs_itrace_entry(ip);
+ trace_xfs_file_ioctl(ip);
switch (cmd) {
case XFS_IOC_ALLOCSP:
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index 9287135e9bf..6c83f7f62dc 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -28,12 +28,8 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dir2_sf.h"
#include "xfs_vnode.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
@@ -237,15 +233,12 @@ xfs_bulkstat_one_compat(
xfs_ino_t ino, /* inode number to get data for */
void __user *buffer, /* buffer to place output in */
int ubsize, /* size of buffer */
- void *private_data, /* my private data */
- xfs_daddr_t bno, /* starting bno of inode cluster */
int *ubused, /* bytes used by me */
- void *dibuff, /* on-disk inode buffer */
int *stat) /* BULKSTAT_RV_... */
{
return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
- xfs_bulkstat_one_fmt_compat, bno,
- ubused, dibuff, stat);
+ xfs_bulkstat_one_fmt_compat,
+ ubused, stat);
}
/* copied from xfs_ioctl.c */
@@ -298,13 +291,11 @@ xfs_compat_ioc_bulkstat(
int res;
error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer,
- sizeof(compat_xfs_bstat_t),
- NULL, 0, NULL, NULL, &res);
+ sizeof(compat_xfs_bstat_t), 0, &res);
} else if (cmd == XFS_IOC_FSBULKSTAT_32) {
error = xfs_bulkstat(mp, &inlast, &count,
- xfs_bulkstat_one_compat, NULL,
- sizeof(compat_xfs_bstat_t), bulkreq.ubuffer,
- BULKSTAT_FG_QUICK, &done);
+ xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t),
+ bulkreq.ubuffer, &done);
} else
error = XFS_ERROR(EINVAL);
if (error)
@@ -549,7 +540,7 @@ xfs_file_compat_ioctl(
if (filp->f_mode & FMODE_NOCMTIME)
ioflags |= IO_INVIS;
- xfs_itrace_entry(ip);
+ trace_xfs_file_compat_ioctl(ip);
switch (cmd) {
/* No size or alignment issues on any arch */
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 44f0b2de153..536b81e63a3 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -24,21 +24,13 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
#include "xfs_alloc.h"
-#include "xfs_dmapi.h"
#include "xfs_quota.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_bmap.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
#include "xfs_rtalloc.h"
#include "xfs_error.h"
#include "xfs_itable.h"
@@ -496,7 +488,7 @@ xfs_vn_getattr(
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
- xfs_itrace_entry(ip);
+ trace_xfs_getattr(ip);
if (XFS_FORCED_SHUTDOWN(mp))
return XFS_ERROR(EIO);
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index facfb323a70..998a9d7fb9c 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -87,7 +87,6 @@
#include <xfs_aops.h>
#include <xfs_super.h>
#include <xfs_globals.h>
-#include <xfs_fs_subr.h>
#include <xfs_buf.h>
/*
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
index 067cafbfc63..bfd5ac9d1f6 100644
--- a/fs/xfs/linux-2.6/xfs_quotaops.c
+++ b/fs/xfs/linux-2.6/xfs_quotaops.c
@@ -16,7 +16,6 @@
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "xfs.h"
-#include "xfs_dmapi.h"
#include "xfs_sb.h"
#include "xfs_inum.h"
#include "xfs_log.h"
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index f2d1718c916..758df94690e 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -25,14 +25,11 @@
#include "xfs_ag.h"
#include "xfs_dir2.h"
#include "xfs_alloc.h"
-#include "xfs_dmapi.h"
#include "xfs_quota.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
#include "xfs_alloc_btree.h"
#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
@@ -43,7 +40,6 @@
#include "xfs_error.h"
#include "xfs_itable.h"
#include "xfs_fsops.h"
-#include "xfs_rw.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_utils.h"
@@ -94,7 +90,6 @@ mempool_t *xfs_ioend_pool;
#define MNTOPT_BARRIER "barrier" /* use writer barriers for log write and
* unwritten extent conversion */
#define MNTOPT_NOBARRIER "nobarrier" /* .. disable */
-#define MNTOPT_OSYNCISOSYNC "osyncisosync" /* o_sync is REALLY o_sync */
#define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */
#define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */
#define MNTOPT_NOIKEEP "noikeep" /* free empty inode clusters */
@@ -116,9 +111,6 @@ mempool_t *xfs_ioend_pool;
#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */
#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */
#define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */
-#define MNTOPT_DMAPI "dmapi" /* DMI enabled (DMAPI / XDSM) */
-#define MNTOPT_XDSM "xdsm" /* DMI enabled (DMAPI / XDSM) */
-#define MNTOPT_DMI "dmi" /* DMI enabled (DMAPI / XDSM) */
#define MNTOPT_DELAYLOG "delaylog" /* Delayed loging enabled */
#define MNTOPT_NODELAYLOG "nodelaylog" /* Delayed loging disabled */
@@ -172,15 +164,13 @@ suffix_strtoul(char *s, char **endp, unsigned int base)
STATIC int
xfs_parseargs(
struct xfs_mount *mp,
- char *options,
- char **mtpt)
+ char *options)
{
struct super_block *sb = mp->m_super;
char *this_char, *value, *eov;
int dsunit = 0;
int dswidth = 0;
int iosize = 0;
- int dmapi_implies_ikeep = 1;
__uint8_t iosizelog = 0;
/*
@@ -243,15 +233,10 @@ xfs_parseargs(
if (!mp->m_logname)
return ENOMEM;
} else if (!strcmp(this_char, MNTOPT_MTPT)) {
- if (!value || !*value) {
- cmn_err(CE_WARN,
- "XFS: %s option requires an argument",
- this_char);
- return EINVAL;
- }
- *mtpt = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
- if (!*mtpt)
- return ENOMEM;
+ cmn_err(CE_WARN,
+ "XFS: %s option not allowed on this system",
+ this_char);
+ return EINVAL;
} else if (!strcmp(this_char, MNTOPT_RTDEV)) {
if (!value || !*value) {
cmn_err(CE_WARN,
@@ -288,8 +273,6 @@ xfs_parseargs(
mp->m_flags &= ~XFS_MOUNT_GRPID;
} else if (!strcmp(this_char, MNTOPT_WSYNC)) {
mp->m_flags |= XFS_MOUNT_WSYNC;
- } else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) {
- mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
} else if (!strcmp(this_char, MNTOPT_NORECOVERY)) {
mp->m_flags |= XFS_MOUNT_NORECOVERY;
} else if (!strcmp(this_char, MNTOPT_NOALIGN)) {
@@ -329,7 +312,6 @@ xfs_parseargs(
} else if (!strcmp(this_char, MNTOPT_IKEEP)) {
mp->m_flags |= XFS_MOUNT_IKEEP;
} else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
- dmapi_implies_ikeep = 0;
mp->m_flags &= ~XFS_MOUNT_IKEEP;
} else if (!strcmp(this_char, MNTOPT_LARGEIO)) {
mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE;
@@ -370,12 +352,6 @@ xfs_parseargs(
} else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) {
mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
mp->m_qflags &= ~XFS_OQUOTA_ENFD;
- } else if (!strcmp(this_char, MNTOPT_DMAPI)) {
- mp->m_flags |= XFS_MOUNT_DMAPI;
- } else if (!strcmp(this_char, MNTOPT_XDSM)) {
- mp->m_flags |= XFS_MOUNT_DMAPI;
- } else if (!strcmp(this_char, MNTOPT_DMI)) {
- mp->m_flags |= XFS_MOUNT_DMAPI;
} else if (!strcmp(this_char, MNTOPT_DELAYLOG)) {
mp->m_flags |= XFS_MOUNT_DELAYLOG;
cmn_err(CE_WARN,
@@ -387,9 +363,11 @@ xfs_parseargs(
cmn_err(CE_WARN,
"XFS: ihashsize no longer used, option is deprecated.");
} else if (!strcmp(this_char, "osyncisdsync")) {
- /* no-op, this is now the default */
cmn_err(CE_WARN,
- "XFS: osyncisdsync is now the default, option is deprecated.");
+ "XFS: osyncisdsync has no effect, option is deprecated.");
+ } else if (!strcmp(this_char, "osyncisosync")) {
+ cmn_err(CE_WARN,
+ "XFS: osyncisosync has no effect, option is deprecated.");
} else if (!strcmp(this_char, "irixsgid")) {
cmn_err(CE_WARN,
"XFS: irixsgid is now a sysctl(2) variable, option is deprecated.");
@@ -430,12 +408,6 @@ xfs_parseargs(
return EINVAL;
}
- if ((mp->m_flags & XFS_MOUNT_DMAPI) && (!*mtpt || *mtpt[0] == '\0')) {
- printk("XFS: %s option needs the mount point option as well\n",
- MNTOPT_DMAPI);
- return EINVAL;
- }
-
if ((dsunit && !dswidth) || (!dsunit && dswidth)) {
cmn_err(CE_WARN,
"XFS: sunit and swidth must be specified together");
@@ -449,18 +421,6 @@ xfs_parseargs(
return EINVAL;
}
- /*
- * Applications using DMI filesystems often expect the
- * inode generation number to be monotonically increasing.
- * If we delete inode chunks we break this assumption, so
- * keep unused inode chunks on disk for DMI filesystems
- * until we come up with a better solution.
- * Note that if "ikeep" or "noikeep" mount options are
- * supplied, then they are honored.
- */
- if ((mp->m_flags & XFS_MOUNT_DMAPI) && dmapi_implies_ikeep)
- mp->m_flags |= XFS_MOUNT_IKEEP;
-
done:
if (!(mp->m_flags & XFS_MOUNT_NOALIGN)) {
/*
@@ -539,10 +499,8 @@ xfs_showargs(
{ XFS_MOUNT_SWALLOC, "," MNTOPT_SWALLOC },
{ XFS_MOUNT_NOUUID, "," MNTOPT_NOUUID },
{ XFS_MOUNT_NORECOVERY, "," MNTOPT_NORECOVERY },
- { XFS_MOUNT_OSYNCISOSYNC, "," MNTOPT_OSYNCISOSYNC },
{ XFS_MOUNT_ATTR2, "," MNTOPT_ATTR2 },
{ XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM },
- { XFS_MOUNT_DMAPI, "," MNTOPT_DMAPI },
{ XFS_MOUNT_GRPID, "," MNTOPT_GRPID },
{ XFS_MOUNT_DELAYLOG, "," MNTOPT_DELAYLOG },
{ 0, NULL }
@@ -947,7 +905,7 @@ xfs_fs_destroy_inode(
{
struct xfs_inode *ip = XFS_I(inode);
- xfs_itrace_entry(ip);
+ trace_xfs_destroy_inode(ip);
XFS_STATS_INC(vn_reclaim);
@@ -1063,10 +1021,8 @@ xfs_log_inode(
* an inode in another recent transaction. So we play it safe and
* fire off the transaction anyway.
*/
- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
- xfs_trans_ihold(tp, ip);
+ xfs_trans_ijoin(tp, ip);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- xfs_trans_set_sync(tp);
error = xfs_trans_commit(tp, 0);
xfs_ilock_demote(ip, XFS_ILOCK_EXCL);
@@ -1082,27 +1038,18 @@ xfs_fs_write_inode(
struct xfs_mount *mp = ip->i_mount;
int error = EAGAIN;
- xfs_itrace_entry(ip);
+ trace_xfs_write_inode(ip);
if (XFS_FORCED_SHUTDOWN(mp))
return XFS_ERROR(EIO);
if (wbc->sync_mode == WB_SYNC_ALL) {
/*
- * Make sure the inode has hit stable storage. By using the
- * log and the fsync transactions we reduce the IOs we have
- * to do here from two (log and inode) to just the log.
- *
- * Note: We still need to do a delwri write of the inode after
- * this to flush it to the backing buffer so that bulkstat
- * works properly if this is the first time the inode has been
- * written. Because we hold the ilock atomically over the
- * transaction commit and the inode flush we are guaranteed
- * that the inode is not pinned when it returns. If the flush
- * lock is already held, then the inode has already been
- * flushed once and we don't need to flush it again. Hence
- * the code will only flush the inode if it isn't already
- * being flushed.
+ * Make sure the inode has made it it into the log. Instead
+ * of forcing it all the way to stable storage using a
+ * synchronous transaction we let the log force inside the
+ * ->sync_fs call do that for thus, which reduces the number
+ * of synchronous log foces dramatically.
*/
xfs_ioend_wait(ip);
xfs_ilock(ip, XFS_ILOCK_SHARED);
@@ -1116,27 +1063,29 @@ xfs_fs_write_inode(
* We make this non-blocking if the inode is contended, return
* EAGAIN to indicate to the caller that they did not succeed.
* This prevents the flush path from blocking on inodes inside
- * another operation right now, they get caught later by xfs_sync.
+ * another operation right now, they get caught later by
+ * xfs_sync.
*/
if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
goto out;
- }
- if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip))
- goto out_unlock;
+ if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip))
+ goto out_unlock;
- /*
- * Now we have the flush lock and the inode is not pinned, we can check
- * if the inode is really clean as we know that there are no pending
- * transaction completions, it is not waiting on the delayed write
- * queue and there is no IO in progress.
- */
- if (xfs_inode_clean(ip)) {
- xfs_ifunlock(ip);
- error = 0;
- goto out_unlock;
+ /*
+ * Now we have the flush lock and the inode is not pinned, we
+ * can check if the inode is really clean as we know that
+ * there are no pending transaction completions, it is not
+ * waiting on the delayed write queue and there is no IO in
+ * progress.
+ */
+ if (xfs_inode_clean(ip)) {
+ xfs_ifunlock(ip);
+ error = 0;
+ goto out_unlock;
+ }
+ error = xfs_iflush(ip, 0);
}
- error = xfs_iflush(ip, 0);
out_unlock:
xfs_iunlock(ip, XFS_ILOCK_SHARED);
@@ -1156,7 +1105,8 @@ xfs_fs_clear_inode(
{
xfs_inode_t *ip = XFS_I(inode);
- xfs_itrace_entry(ip);
+ trace_xfs_clear_inode(ip);
+
XFS_STATS_INC(vn_rele);
XFS_STATS_INC(vn_remove);
XFS_STATS_DEC(vn_active);
@@ -1193,22 +1143,13 @@ xfs_fs_put_super(
{
struct xfs_mount *mp = XFS_M(sb);
+ /*
+ * Unregister the memory shrinker before we tear down the mount
+ * structure so we don't have memory reclaim racing with us here.
+ */
+ xfs_inode_shrinker_unregister(mp);
xfs_syncd_stop(mp);
- if (!(sb->s_flags & MS_RDONLY)) {
- /*
- * XXX(hch): this should be SYNC_WAIT.
- *
- * Or more likely not needed at all because the VFS is already
- * calling ->sync_fs after shutting down all filestem
- * operations and just before calling ->put_super.
- */
- xfs_sync_data(mp, 0);
- xfs_sync_attr(mp, 0);
- }
-
- XFS_SEND_PREUNMOUNT(mp);
-
/*
* Blow away any referenced inode in the filestreams cache.
* This can and will cause log traffic as inodes go inactive
@@ -1218,14 +1159,10 @@ xfs_fs_put_super(
XFS_bflush(mp->m_ddev_targp);
- XFS_SEND_UNMOUNT(mp);
-
xfs_unmountfs(mp);
xfs_freesb(mp);
- xfs_inode_shrinker_unregister(mp);
xfs_icsb_destroy_counters(mp);
xfs_close_devices(mp);
- xfs_dmops_put(mp);
xfs_free_fsname(mp);
kfree(mp);
}
@@ -1543,7 +1480,6 @@ xfs_fs_fill_super(
struct inode *root;
struct xfs_mount *mp = NULL;
int flags = 0, error = ENOMEM;
- char *mtpt = NULL;
mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
if (!mp)
@@ -1559,7 +1495,7 @@ xfs_fs_fill_super(
mp->m_super = sb;
sb->s_fs_info = mp;
- error = xfs_parseargs(mp, (char *)data, &mtpt);
+ error = xfs_parseargs(mp, (char *)data);
if (error)
goto out_free_fsname;
@@ -1571,16 +1507,12 @@ xfs_fs_fill_super(
#endif
sb->s_op = &xfs_super_operations;
- error = xfs_dmops_get(mp);
- if (error)
- goto out_free_fsname;
-
if (silent)
flags |= XFS_MFSI_QUIET;
error = xfs_open_devices(mp);
if (error)
- goto out_put_dmops;
+ goto out_free_fsname;
if (xfs_icsb_init_counters(mp))
mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
@@ -1608,8 +1540,6 @@ xfs_fs_fill_super(
if (error)
goto out_filestream_unmount;
- XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, mtpt, mp->m_fsname);
-
sb->s_magic = XFS_SB_MAGIC;
sb->s_blocksize = mp->m_sb.sb_blocksize;
sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
@@ -1638,7 +1568,6 @@ xfs_fs_fill_super(
xfs_inode_shrinker_register(mp);
- kfree(mtpt);
return 0;
out_filestream_unmount:
@@ -1648,11 +1577,8 @@ xfs_fs_fill_super(
out_destroy_counters:
xfs_icsb_destroy_counters(mp);
xfs_close_devices(mp);
- out_put_dmops:
- xfs_dmops_put(mp);
out_free_fsname:
xfs_free_fsname(mp);
- kfree(mtpt);
kfree(mp);
out:
return -error;
@@ -1759,6 +1685,12 @@ xfs_init_zones(void)
if (!xfs_trans_zone)
goto out_destroy_ifork_zone;
+ xfs_log_item_desc_zone =
+ kmem_zone_init(sizeof(struct xfs_log_item_desc),
+ "xfs_log_item_desc");
+ if (!xfs_log_item_desc_zone)
+ goto out_destroy_trans_zone;
+
/*
* The size of the zone allocated buf log item is the maximum
* size possible under XFS. This wastes a little bit of memory,
@@ -1768,7 +1700,7 @@ xfs_init_zones(void)
(((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) /
NBWORD) * sizeof(int))), "xfs_buf_item");
if (!xfs_buf_item_zone)
- goto out_destroy_trans_zone;
+ goto out_destroy_log_item_desc_zone;
xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) +
((XFS_EFD_MAX_FAST_EXTENTS - 1) *
@@ -1805,6 +1737,8 @@ xfs_init_zones(void)
kmem_zone_destroy(xfs_efd_zone);
out_destroy_buf_item_zone:
kmem_zone_destroy(xfs_buf_item_zone);
+ out_destroy_log_item_desc_zone:
+ kmem_zone_destroy(xfs_log_item_desc_zone);
out_destroy_trans_zone:
kmem_zone_destroy(xfs_trans_zone);
out_destroy_ifork_zone:
@@ -1835,6 +1769,7 @@ xfs_destroy_zones(void)
kmem_zone_destroy(xfs_efi_zone);
kmem_zone_destroy(xfs_efd_zone);
kmem_zone_destroy(xfs_buf_item_zone);
+ kmem_zone_destroy(xfs_log_item_desc_zone);
kmem_zone_destroy(xfs_trans_zone);
kmem_zone_destroy(xfs_ifork_zone);
kmem_zone_destroy(xfs_dabuf_zone);
@@ -1883,7 +1818,6 @@ init_xfs_fs(void)
goto out_cleanup_procfs;
vfs_initquota();
- xfs_inode_shrinker_init();
error = register_filesystem(&xfs_fs_type);
if (error)
@@ -1911,7 +1845,6 @@ exit_xfs_fs(void)
{
vfs_exitquota();
unregister_filesystem(&xfs_fs_type);
- xfs_inode_shrinker_destroy();
xfs_sysctl_unregister();
xfs_cleanup_procfs();
xfs_buf_terminate();
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index 519618e9279..1ef4a4d2d99 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -56,12 +56,6 @@ extern void xfs_qm_exit(void);
# define XFS_BIGFS_STRING
#endif
-#ifdef CONFIG_XFS_DMAPI
-# define XFS_DMAPI_STRING "dmapi support, "
-#else
-# define XFS_DMAPI_STRING
-#endif
-
#ifdef DEBUG
# define XFS_DBG_STRING "debug"
#else
@@ -72,7 +66,6 @@ extern void xfs_qm_exit(void);
XFS_SECURITY_STRING \
XFS_REALTIME_STRING \
XFS_BIGFS_STRING \
- XFS_DMAPI_STRING \
XFS_DBG_STRING /* DBG must be last */
struct xfs_inode;
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index ef7f0218bcc..dfcbd98d159 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -24,25 +24,14 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
#include "xfs_inode.h"
#include "xfs_dinode.h"
#include "xfs_error.h"
-#include "xfs_mru_cache.h"
#include "xfs_filestream.h"
#include "xfs_vnodeops.h"
-#include "xfs_utils.h"
-#include "xfs_buf_item.h"
#include "xfs_inode_item.h"
-#include "xfs_rw.h"
#include "xfs_quota.h"
#include "xfs_trace.h"
@@ -144,6 +133,41 @@ restart:
return last_error;
}
+/*
+ * Select the next per-ag structure to iterate during the walk. The reclaim
+ * walk is optimised only to walk AGs with reclaimable inodes in them.
+ */
+static struct xfs_perag *
+xfs_inode_ag_iter_next_pag(
+ struct xfs_mount *mp,
+ xfs_agnumber_t *first,
+ int tag)
+{
+ struct xfs_perag *pag = NULL;
+
+ if (tag == XFS_ICI_RECLAIM_TAG) {
+ int found;
+ int ref;
+
+ spin_lock(&mp->m_perag_lock);
+ found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
+ (void **)&pag, *first, 1, tag);
+ if (found <= 0) {
+ spin_unlock(&mp->m_perag_lock);
+ return NULL;
+ }
+ *first = pag->pag_agno + 1;
+ /* open coded pag reference increment */
+ ref = atomic_inc_return(&pag->pag_ref);
+ spin_unlock(&mp->m_perag_lock);
+ trace_xfs_perag_get_reclaim(mp, pag->pag_agno, ref, _RET_IP_);
+ } else {
+ pag = xfs_perag_get(mp, *first);
+ (*first)++;
+ }
+ return pag;
+}
+
int
xfs_inode_ag_iterator(
struct xfs_mount *mp,
@@ -154,16 +178,15 @@ xfs_inode_ag_iterator(
int exclusive,
int *nr_to_scan)
{
+ struct xfs_perag *pag;
int error = 0;
int last_error = 0;
xfs_agnumber_t ag;
int nr;
nr = nr_to_scan ? *nr_to_scan : INT_MAX;
- for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
- struct xfs_perag *pag;
-
- pag = xfs_perag_get(mp, ag);
+ ag = 0;
+ while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, tag))) {
error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
exclusive, &nr);
xfs_perag_put(pag);
@@ -285,7 +308,7 @@ xfs_sync_inode_attr(
/*
* Write out pagecache data for the whole filesystem.
*/
-int
+STATIC int
xfs_sync_data(
struct xfs_mount *mp,
int flags)
@@ -306,7 +329,7 @@ xfs_sync_data(
/*
* Write out inode metadata (attributes) for the whole filesystem.
*/
-int
+STATIC int
xfs_sync_attr(
struct xfs_mount *mp,
int flags)
@@ -339,8 +362,7 @@ xfs_commit_dummy_trans(
xfs_ilock(ip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
- xfs_trans_ihold(tp, ip);
+ xfs_trans_ijoin(tp, ip);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
error = xfs_trans_commit(tp, 0);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -640,6 +662,17 @@ __xfs_inode_set_reclaim_tag(
radix_tree_tag_set(&pag->pag_ici_root,
XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
XFS_ICI_RECLAIM_TAG);
+
+ if (!pag->pag_ici_reclaimable) {
+ /* propagate the reclaim tag up into the perag radix tree */
+ spin_lock(&ip->i_mount->m_perag_lock);
+ radix_tree_tag_set(&ip->i_mount->m_perag_tree,
+ XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
+ XFS_ICI_RECLAIM_TAG);
+ spin_unlock(&ip->i_mount->m_perag_lock);
+ trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno,
+ -1, _RET_IP_);
+ }
pag->pag_ici_reclaimable++;
}
@@ -674,6 +707,16 @@ __xfs_inode_clear_reclaim_tag(
radix_tree_tag_clear(&pag->pag_ici_root,
XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
pag->pag_ici_reclaimable--;
+ if (!pag->pag_ici_reclaimable) {
+ /* clear the reclaim tag from the perag radix tree */
+ spin_lock(&ip->i_mount->m_perag_lock);
+ radix_tree_tag_clear(&ip->i_mount->m_perag_tree,
+ XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
+ XFS_ICI_RECLAIM_TAG);
+ spin_unlock(&ip->i_mount->m_perag_lock);
+ trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno,
+ -1, _RET_IP_);
+ }
}
/*
@@ -812,7 +855,36 @@ out:
reclaim:
xfs_ifunlock(ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
- xfs_ireclaim(ip);
+
+ XFS_STATS_INC(xs_ig_reclaims);
+ /*
+ * Remove the inode from the per-AG radix tree.
+ *
+ * Because radix_tree_delete won't complain even if the item was never
+ * added to the tree assert that it's been there before to catch
+ * problems with the inode life time early on.
+ */
+ write_lock(&pag->pag_ici_lock);
+ if (!radix_tree_delete(&pag->pag_ici_root,
+ XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino)))
+ ASSERT(0);
+ write_unlock(&pag->pag_ici_lock);
+
+ /*
+ * Here we do an (almost) spurious inode lock in order to coordinate
+ * with inode cache radix tree lookups. This is because the lookup
+ * can reference the inodes in the cache without taking references.
+ *
+ * We make that OK here by ensuring that we wait until the inode is
+ * unlocked after the lookup before we go ahead and free it. We get
+ * both the ilock and the iolock because the code may need to drop the
+ * ilock one but will still hold the iolock.
+ */
+ xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+ xfs_qm_dqdetach(ip);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+
+ xfs_inode_free(ip);
return error;
}
@@ -828,83 +900,52 @@ xfs_reclaim_inodes(
/*
* Shrinker infrastructure.
- *
- * This is all far more complex than it needs to be. It adds a global list of
- * mounts because the shrinkers can only call a global context. We need to make
- * the shrinkers pass a context to avoid the need for global state.
*/
-static LIST_HEAD(xfs_mount_list);
-static struct rw_semaphore xfs_mount_list_lock;
-
static int
xfs_reclaim_inode_shrink(
+ struct shrinker *shrink,
int nr_to_scan,
gfp_t gfp_mask)
{
struct xfs_mount *mp;
struct xfs_perag *pag;
xfs_agnumber_t ag;
- int reclaimable = 0;
+ int reclaimable;
+ mp = container_of(shrink, struct xfs_mount, m_inode_shrink);
if (nr_to_scan) {
if (!(gfp_mask & __GFP_FS))
return -1;
- down_read(&xfs_mount_list_lock);
- list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
- xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
+ xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan);
- if (nr_to_scan <= 0)
- break;
- }
- up_read(&xfs_mount_list_lock);
- }
+ /* if we don't exhaust the scan, don't bother coming back */
+ if (nr_to_scan > 0)
+ return -1;
+ }
- down_read(&xfs_mount_list_lock);
- list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
- for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
- pag = xfs_perag_get(mp, ag);
- reclaimable += pag->pag_ici_reclaimable;
- xfs_perag_put(pag);
- }
+ reclaimable = 0;
+ ag = 0;
+ while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag,
+ XFS_ICI_RECLAIM_TAG))) {
+ reclaimable += pag->pag_ici_reclaimable;
+ xfs_perag_put(pag);
}
- up_read(&xfs_mount_list_lock);
return reclaimable;
}
-static struct shrinker xfs_inode_shrinker = {
- .shrink = xfs_reclaim_inode_shrink,
- .seeks = DEFAULT_SEEKS,
-};
-
-void __init
-xfs_inode_shrinker_init(void)
-{
- init_rwsem(&xfs_mount_list_lock);
- register_shrinker(&xfs_inode_shrinker);
-}
-
-void
-xfs_inode_shrinker_destroy(void)
-{
- ASSERT(list_empty(&xfs_mount_list));
- unregister_shrinker(&xfs_inode_shrinker);
-}
-
void
xfs_inode_shrinker_register(
struct xfs_mount *mp)
{
- down_write(&xfs_mount_list_lock);
- list_add_tail(&mp->m_mplist, &xfs_mount_list);
- up_write(&xfs_mount_list_lock);
+ mp->m_inode_shrink.shrink = xfs_reclaim_inode_shrink;
+ mp->m_inode_shrink.seeks = DEFAULT_SEEKS;
+ register_shrinker(&mp->m_inode_shrink);
}
void
xfs_inode_shrinker_unregister(
struct xfs_mount *mp)
{
- down_write(&xfs_mount_list_lock);
- list_del(&mp->m_mplist);
- up_write(&xfs_mount_list_lock);
+ unregister_shrinker(&mp->m_inode_shrink);
}
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index cdcbaaca988..fe78726196f 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -35,9 +35,6 @@ typedef struct xfs_sync_work {
int xfs_syncd_init(struct xfs_mount *mp);
void xfs_syncd_stop(struct xfs_mount *mp);
-int xfs_sync_attr(struct xfs_mount *mp, int flags);
-int xfs_sync_data(struct xfs_mount *mp, int flags);
-
int xfs_quiesce_data(struct xfs_mount *mp);
void xfs_quiesce_attr(struct xfs_mount *mp);
@@ -55,8 +52,6 @@ int xfs_inode_ag_iterator(struct xfs_mount *mp,
int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
int flags, int tag, int write_lock, int *nr_to_scan);
-void xfs_inode_shrinker_init(void);
-void xfs_inode_shrinker_destroy(void);
void xfs_inode_shrinker_register(struct xfs_mount *mp);
void xfs_inode_shrinker_unregister(struct xfs_mount *mp);
diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/linux-2.6/xfs_trace.c
index d12be8470cb..88d25d4aa56 100644
--- a/fs/xfs/linux-2.6/xfs_trace.c
+++ b/fs/xfs/linux-2.6/xfs_trace.c
@@ -24,17 +24,13 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
#include "xfs_da_btree.h"
#include "xfs_bmap_btree.h"
#include "xfs_alloc_btree.h"
#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_ialloc.h"
#include "xfs_itable.h"
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index 73d5aa11738..c657cdca2cd 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -124,7 +124,10 @@ DEFINE_EVENT(xfs_perag_class, name, \
unsigned long caller_ip), \
TP_ARGS(mp, agno, refcount, caller_ip))
DEFINE_PERAG_REF_EVENT(xfs_perag_get);
+DEFINE_PERAG_REF_EVENT(xfs_perag_get_reclaim);
DEFINE_PERAG_REF_EVENT(xfs_perag_put);
+DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim);
+DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim);
TRACE_EVENT(xfs_attr_list_node_descend,
TP_PROTO(struct xfs_attr_list_context *ctx,
@@ -314,8 +317,6 @@ DEFINE_BUF_EVENT(xfs_buf_init);
DEFINE_BUF_EVENT(xfs_buf_free);
DEFINE_BUF_EVENT(xfs_buf_hold);
DEFINE_BUF_EVENT(xfs_buf_rele);
-DEFINE_BUF_EVENT(xfs_buf_pin);
-DEFINE_BUF_EVENT(xfs_buf_unpin);
DEFINE_BUF_EVENT(xfs_buf_iodone);
DEFINE_BUF_EVENT(xfs_buf_iorequest);
DEFINE_BUF_EVENT(xfs_buf_bawrite);
@@ -538,7 +539,7 @@ DEFINE_LOCK_EVENT(xfs_ilock_nowait);
DEFINE_LOCK_EVENT(xfs_ilock_demote);
DEFINE_LOCK_EVENT(xfs_iunlock);
-DECLARE_EVENT_CLASS(xfs_iget_class,
+DECLARE_EVENT_CLASS(xfs_inode_class,
TP_PROTO(struct xfs_inode *ip),
TP_ARGS(ip),
TP_STRUCT__entry(
@@ -554,16 +555,38 @@ DECLARE_EVENT_CLASS(xfs_iget_class,
__entry->ino)
)
-#define DEFINE_IGET_EVENT(name) \
-DEFINE_EVENT(xfs_iget_class, name, \
+#define DEFINE_INODE_EVENT(name) \
+DEFINE_EVENT(xfs_inode_class, name, \
TP_PROTO(struct xfs_inode *ip), \
TP_ARGS(ip))
-DEFINE_IGET_EVENT(xfs_iget_skip);
-DEFINE_IGET_EVENT(xfs_iget_reclaim);
-DEFINE_IGET_EVENT(xfs_iget_found);
-DEFINE_IGET_EVENT(xfs_iget_alloc);
-
-DECLARE_EVENT_CLASS(xfs_inode_class,
+DEFINE_INODE_EVENT(xfs_iget_skip);
+DEFINE_INODE_EVENT(xfs_iget_reclaim);
+DEFINE_INODE_EVENT(xfs_iget_reclaim_fail);
+DEFINE_INODE_EVENT(xfs_iget_hit);
+DEFINE_INODE_EVENT(xfs_iget_miss);
+
+DEFINE_INODE_EVENT(xfs_getattr);
+DEFINE_INODE_EVENT(xfs_setattr);
+DEFINE_INODE_EVENT(xfs_readlink);
+DEFINE_INODE_EVENT(xfs_alloc_file_space);
+DEFINE_INODE_EVENT(xfs_free_file_space);
+DEFINE_INODE_EVENT(xfs_readdir);
+#ifdef CONFIG_XFS_POSIX_ACL
+DEFINE_INODE_EVENT(xfs_check_acl);
+#endif
+DEFINE_INODE_EVENT(xfs_vm_bmap);
+DEFINE_INODE_EVENT(xfs_file_ioctl);
+DEFINE_INODE_EVENT(xfs_file_compat_ioctl);
+DEFINE_INODE_EVENT(xfs_ioctl_setattr);
+DEFINE_INODE_EVENT(xfs_file_fsync);
+DEFINE_INODE_EVENT(xfs_destroy_inode);
+DEFINE_INODE_EVENT(xfs_write_inode);
+DEFINE_INODE_EVENT(xfs_clear_inode);
+
+DEFINE_INODE_EVENT(xfs_dquot_dqalloc);
+DEFINE_INODE_EVENT(xfs_dquot_dqdetach);
+
+DECLARE_EVENT_CLASS(xfs_iref_class,
TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
TP_ARGS(ip, caller_ip),
TP_STRUCT__entry(
@@ -588,20 +611,71 @@ DECLARE_EVENT_CLASS(xfs_inode_class,
(char *)__entry->caller_ip)
)
-#define DEFINE_INODE_EVENT(name) \
-DEFINE_EVENT(xfs_inode_class, name, \
+#define DEFINE_IREF_EVENT(name) \
+DEFINE_EVENT(xfs_iref_class, name, \
TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \
TP_ARGS(ip, caller_ip))
-DEFINE_INODE_EVENT(xfs_ihold);
-DEFINE_INODE_EVENT(xfs_irele);
-DEFINE_INODE_EVENT(xfs_inode_pin);
-DEFINE_INODE_EVENT(xfs_inode_unpin);
-DEFINE_INODE_EVENT(xfs_inode_unpin_nowait);
+DEFINE_IREF_EVENT(xfs_ihold);
+DEFINE_IREF_EVENT(xfs_irele);
+DEFINE_IREF_EVENT(xfs_inode_pin);
+DEFINE_IREF_EVENT(xfs_inode_unpin);
+DEFINE_IREF_EVENT(xfs_inode_unpin_nowait);
+
+DECLARE_EVENT_CLASS(xfs_namespace_class,
+ TP_PROTO(struct xfs_inode *dp, struct xfs_name *name),
+ TP_ARGS(dp, name),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, dp_ino)
+ __dynamic_array(char, name, name->len)
+ ),
+ TP_fast_assign(
+ __entry->dev = VFS_I(dp)->i_sb->s_dev;
+ __entry->dp_ino = dp->i_ino;
+ memcpy(__get_str(name), name->name, name->len);
+ ),
+ TP_printk("dev %d:%d dp ino 0x%llx name %s",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->dp_ino,
+ __get_str(name))
+)
-/* the old xfs_itrace_entry tracer - to be replaced by s.th. in the VFS */
-DEFINE_INODE_EVENT(xfs_inode);
-#define xfs_itrace_entry(ip) \
- trace_xfs_inode(ip, _THIS_IP_)
+#define DEFINE_NAMESPACE_EVENT(name) \
+DEFINE_EVENT(xfs_namespace_class, name, \
+ TP_PROTO(struct xfs_inode *dp, struct xfs_name *name), \
+ TP_ARGS(dp, name))
+DEFINE_NAMESPACE_EVENT(xfs_remove);
+DEFINE_NAMESPACE_EVENT(xfs_link);
+DEFINE_NAMESPACE_EVENT(xfs_lookup);
+DEFINE_NAMESPACE_EVENT(xfs_create);
+DEFINE_NAMESPACE_EVENT(xfs_symlink);
+
+TRACE_EVENT(xfs_rename,
+ TP_PROTO(struct xfs_inode *src_dp, struct xfs_inode *target_dp,
+ struct xfs_name *src_name, struct xfs_name *target_name),
+ TP_ARGS(src_dp, target_dp, src_name, target_name),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, src_dp_ino)
+ __field(xfs_ino_t, target_dp_ino)
+ __dynamic_array(char, src_name, src_name->len)
+ __dynamic_array(char, target_name, target_name->len)
+ ),
+ TP_fast_assign(
+ __entry->dev = VFS_I(src_dp)->i_sb->s_dev;
+ __entry->src_dp_ino = src_dp->i_ino;
+ __entry->target_dp_ino = target_dp->i_ino;
+ memcpy(__get_str(src_name), src_name->name, src_name->len);
+ memcpy(__get_str(target_name), target_name->name, target_name->len);
+ ),
+ TP_printk("dev %d:%d src dp ino 0x%llx target dp ino 0x%llx"
+ " src name %s target name %s",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->src_dp_ino,
+ __entry->target_dp_ino,
+ __get_str(src_name),
+ __get_str(target_name))
+)
DECLARE_EVENT_CLASS(xfs_dquot_class,
TP_PROTO(struct xfs_dquot *dqp),
@@ -681,9 +755,6 @@ DEFINE_DQUOT_EVENT(xfs_dqrele);
DEFINE_DQUOT_EVENT(xfs_dqflush);
DEFINE_DQUOT_EVENT(xfs_dqflush_force);
DEFINE_DQUOT_EVENT(xfs_dqflush_done);
-/* not really iget events, but we re-use the format */
-DEFINE_IGET_EVENT(xfs_dquot_dqalloc);
-DEFINE_IGET_EVENT(xfs_dquot_dqdetach);
DECLARE_EVENT_CLASS(xfs_loggrant_class,
TP_PROTO(struct log *log, struct xlog_ticket *tic),
@@ -831,33 +902,29 @@ DECLARE_EVENT_CLASS(xfs_page_class,
__field(loff_t, size)
__field(unsigned long, offset)
__field(int, delalloc)
- __field(int, unmapped)
__field(int, unwritten)
),
TP_fast_assign(
- int delalloc = -1, unmapped = -1, unwritten = -1;
+ int delalloc = -1, unwritten = -1;
if (page_has_buffers(page))
- xfs_count_page_state(page, &delalloc,
- &unmapped, &unwritten);
+ xfs_count_page_state(page, &delalloc, &unwritten);
__entry->dev = inode->i_sb->s_dev;
__entry->ino = XFS_I(inode)->i_ino;
__entry->pgoff = page_offset(page);
__entry->size = i_size_read(inode);
__entry->offset = off;
__entry->delalloc = delalloc;
- __entry->unmapped = unmapped;
__entry->unwritten = unwritten;
),
TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx "
- "delalloc %d unmapped %d unwritten %d",
+ "delalloc %d unwritten %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->pgoff,
__entry->size,
__entry->offset,
__entry->delalloc,
- __entry->unmapped,
__entry->unwritten)
)
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 585e7633dfc..e1a2f6800e0 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -23,25 +23,15 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
#include "xfs_alloc.h"
-#include "xfs_dmapi.h"
#include "xfs_quota.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dinode.h"
#include "xfs_inode.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
#include "xfs_bmap.h"
#include "xfs_rtalloc.h"
#include "xfs_error.h"
#include "xfs_itable.h"
-#include "xfs_rw.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_trans_space.h"
@@ -64,8 +54,6 @@
flush lock - ditto.
*/
-STATIC void xfs_qm_dqflush_done(xfs_buf_t *, xfs_dq_logitem_t *);
-
#ifdef DEBUG
xfs_buftarg_t *xfs_dqerror_target;
int xfs_do_dqerror;
@@ -390,21 +378,14 @@ xfs_qm_dqalloc(
return (ESRCH);
}
- /*
- * xfs_trans_commit normally decrements the vnode ref count
- * when it unlocks the inode. Since we want to keep the quota
- * inode around, we bump the vnode ref count now.
- */
- IHOLD(quotip);
-
- xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin_ref(tp, quotip, XFS_ILOCK_EXCL);
nmaps = 1;
if ((error = xfs_bmapi(tp, quotip,
offset_fsb, XFS_DQUOT_CLUSTER_SIZE_FSB,
XFS_BMAPI_METADATA | XFS_BMAPI_WRITE,
&firstblock,
XFS_QM_DQALLOC_SPACE_RES(mp),
- &map, &nmaps, &flist, NULL))) {
+ &map, &nmaps, &flist))) {
goto error0;
}
ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
@@ -520,7 +501,7 @@ xfs_qm_dqtobp(
error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset,
XFS_DQUOT_CLUSTER_SIZE_FSB,
XFS_BMAPI_METADATA,
- NULL, 0, &map, &nmaps, NULL, NULL);
+ NULL, 0, &map, &nmaps, NULL);
xfs_iunlock(quotip, XFS_ILOCK_SHARED);
if (error)
@@ -1141,6 +1122,46 @@ xfs_qm_dqrele(
xfs_qm_dqput(dqp);
}
+/*
+ * This is the dquot flushing I/O completion routine. It is called
+ * from interrupt level when the buffer containing the dquot is
+ * flushed to disk. It is responsible for removing the dquot logitem
+ * from the AIL if it has not been re-logged, and unlocking the dquot's
+ * flush lock. This behavior is very similar to that of inodes..
+ */
+STATIC void
+xfs_qm_dqflush_done(
+ struct xfs_buf *bp,
+ struct xfs_log_item *lip)
+{
+ xfs_dq_logitem_t *qip = (struct xfs_dq_logitem *)lip;
+ xfs_dquot_t *dqp = qip->qli_dquot;
+ struct xfs_ail *ailp = lip->li_ailp;
+
+ /*
+ * We only want to pull the item from the AIL if its
+ * location in the log has not changed since we started the flush.
+ * Thus, we only bother if the dquot's lsn has
+ * not changed. First we check the lsn outside the lock
+ * since it's cheaper, and then we recheck while
+ * holding the lock before removing the dquot from the AIL.
+ */
+ if ((lip->li_flags & XFS_LI_IN_AIL) &&
+ lip->li_lsn == qip->qli_flush_lsn) {
+
+ /* xfs_trans_ail_delete() drops the AIL lock. */
+ spin_lock(&ailp->xa_lock);
+ if (lip->li_lsn == qip->qli_flush_lsn)
+ xfs_trans_ail_delete(ailp, lip);
+ else
+ spin_unlock(&ailp->xa_lock);
+ }
+
+ /*
+ * Release the dq's flush lock since we're done with it.
+ */
+ xfs_dqfunlock(dqp);
+}
/*
* Write a modified dquot to disk.
@@ -1222,8 +1243,9 @@ xfs_qm_dqflush(
* Attach an iodone routine so that we can remove this dquot from the
* AIL and release the flush lock once the dquot is synced to disk.
*/
- xfs_buf_attach_iodone(bp, (void(*)(xfs_buf_t *, xfs_log_item_t *))
- xfs_qm_dqflush_done, &(dqp->q_logitem.qli_item));
+ xfs_buf_attach_iodone(bp, xfs_qm_dqflush_done,
+ &dqp->q_logitem.qli_item);
+
/*
* If the buffer is pinned then push on the log so we won't
* get stuck waiting in the write for too long.
@@ -1247,50 +1269,6 @@ xfs_qm_dqflush(
}
-/*
- * This is the dquot flushing I/O completion routine. It is called
- * from interrupt level when the buffer containing the dquot is
- * flushed to disk. It is responsible for removing the dquot logitem
- * from the AIL if it has not been re-logged, and unlocking the dquot's
- * flush lock. This behavior is very similar to that of inodes..
- */
-/*ARGSUSED*/
-STATIC void
-xfs_qm_dqflush_done(
- xfs_buf_t *bp,
- xfs_dq_logitem_t *qip)
-{
- xfs_dquot_t *dqp;
- struct xfs_ail *ailp;
-
- dqp = qip->qli_dquot;
- ailp = qip->qli_item.li_ailp;
-
- /*
- * We only want to pull the item from the AIL if its
- * location in the log has not changed since we started the flush.
- * Thus, we only bother if the dquot's lsn has
- * not changed. First we check the lsn outside the lock
- * since it's cheaper, and then we recheck while
- * holding the lock before removing the dquot from the AIL.
- */
- if ((qip->qli_item.li_flags & XFS_LI_IN_AIL) &&
- qip->qli_item.li_lsn == qip->qli_flush_lsn) {
-
- /* xfs_trans_ail_delete() drops the AIL lock. */
- spin_lock(&ailp->xa_lock);
- if (qip->qli_item.li_lsn == qip->qli_flush_lsn)
- xfs_trans_ail_delete(ailp, (xfs_log_item_t*)qip);
- else
- spin_unlock(&ailp->xa_lock);
- }
-
- /*
- * Release the dq's flush lock since we're done with it.
- */
- xfs_dqfunlock(dqp);
-}
-
int
xfs_qm_dqlock_nowait(
xfs_dquot_t *dqp)
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 8d89a24ae32..2a1f3dc10a0 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -23,42 +23,36 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
#include "xfs_alloc.h"
-#include "xfs_dmapi.h"
#include "xfs_quota.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_bmap.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
#include "xfs_rtalloc.h"
#include "xfs_error.h"
#include "xfs_itable.h"
-#include "xfs_rw.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_trans_priv.h"
#include "xfs_qm.h"
+static inline struct xfs_dq_logitem *DQUOT_ITEM(struct xfs_log_item *lip)
+{
+ return container_of(lip, struct xfs_dq_logitem, qli_item);
+}
+
/*
* returns the number of iovecs needed to log the given dquot item.
*/
-/* ARGSUSED */
STATIC uint
xfs_qm_dquot_logitem_size(
- xfs_dq_logitem_t *logitem)
+ struct xfs_log_item *lip)
{
/*
* we need only two iovecs, one for the format, one for the real thing
*/
- return (2);
+ return 2;
}
/*
@@ -66,22 +60,21 @@ xfs_qm_dquot_logitem_size(
*/
STATIC void
xfs_qm_dquot_logitem_format(
- xfs_dq_logitem_t *logitem,
- xfs_log_iovec_t *logvec)
+ struct xfs_log_item *lip,
+ struct xfs_log_iovec *logvec)
{
- ASSERT(logitem);
- ASSERT(logitem->qli_dquot);
+ struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip);
- logvec->i_addr = (xfs_caddr_t)&logitem->qli_format;
+ logvec->i_addr = &qlip->qli_format;
logvec->i_len = sizeof(xfs_dq_logformat_t);
logvec->i_type = XLOG_REG_TYPE_QFORMAT;
logvec++;
- logvec->i_addr = (xfs_caddr_t)&logitem->qli_dquot->q_core;
+ logvec->i_addr = &qlip->qli_dquot->q_core;
logvec->i_len = sizeof(xfs_disk_dquot_t);
logvec->i_type = XLOG_REG_TYPE_DQUOT;
- ASSERT(2 == logitem->qli_item.li_desc->lid_size);
- logitem->qli_format.qlf_size = 2;
+ ASSERT(2 == lip->li_desc->lid_size);
+ qlip->qli_format.qlf_size = 2;
}
@@ -90,9 +83,9 @@ xfs_qm_dquot_logitem_format(
*/
STATIC void
xfs_qm_dquot_logitem_pin(
- xfs_dq_logitem_t *logitem)
+ struct xfs_log_item *lip)
{
- xfs_dquot_t *dqp = logitem->qli_dquot;
+ struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
ASSERT(XFS_DQ_IS_LOCKED(dqp));
atomic_inc(&dqp->q_pincount);
@@ -104,27 +97,18 @@ xfs_qm_dquot_logitem_pin(
* dquot must have been previously pinned with a call to
* xfs_qm_dquot_logitem_pin().
*/
-/* ARGSUSED */
STATIC void
xfs_qm_dquot_logitem_unpin(
- xfs_dq_logitem_t *logitem)
+ struct xfs_log_item *lip,
+ int remove)
{
- xfs_dquot_t *dqp = logitem->qli_dquot;
+ struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
ASSERT(atomic_read(&dqp->q_pincount) > 0);
if (atomic_dec_and_test(&dqp->q_pincount))
wake_up(&dqp->q_pinwait);
}
-/* ARGSUSED */
-STATIC void
-xfs_qm_dquot_logitem_unpin_remove(
- xfs_dq_logitem_t *logitem,
- xfs_trans_t *tp)
-{
- xfs_qm_dquot_logitem_unpin(logitem);
-}
-
/*
* Given the logitem, this writes the corresponding dquot entry to disk
* asynchronously. This is called with the dquot entry securely locked;
@@ -133,12 +117,10 @@ xfs_qm_dquot_logitem_unpin_remove(
*/
STATIC void
xfs_qm_dquot_logitem_push(
- xfs_dq_logitem_t *logitem)
+ struct xfs_log_item *lip)
{
- xfs_dquot_t *dqp;
- int error;
-
- dqp = logitem->qli_dquot;
+ struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
+ int error;
ASSERT(XFS_DQ_IS_LOCKED(dqp));
ASSERT(!completion_done(&dqp->q_flush));
@@ -160,27 +142,25 @@ xfs_qm_dquot_logitem_push(
xfs_dqunlock(dqp);
}
-/*ARGSUSED*/
STATIC xfs_lsn_t
xfs_qm_dquot_logitem_committed(
- xfs_dq_logitem_t *l,
+ struct xfs_log_item *lip,
xfs_lsn_t lsn)
{
/*
* We always re-log the entire dquot when it becomes dirty,
* so, the latest copy _is_ the only one that matters.
*/
- return (lsn);
+ return lsn;
}
-
/*
* This is called to wait for the given dquot to be unpinned.
* Most of these pin/unpin routines are plagiarized from inode code.
*/
void
xfs_qm_dqunpin_wait(
- xfs_dquot_t *dqp)
+ struct xfs_dquot *dqp)
{
ASSERT(XFS_DQ_IS_LOCKED(dqp));
if (atomic_read(&dqp->q_pincount) == 0)
@@ -206,13 +186,12 @@ xfs_qm_dqunpin_wait(
*/
STATIC void
xfs_qm_dquot_logitem_pushbuf(
- xfs_dq_logitem_t *qip)
+ struct xfs_log_item *lip)
{
- xfs_dquot_t *dqp;
- xfs_mount_t *mp;
- xfs_buf_t *bp;
+ struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip);
+ struct xfs_dquot *dqp = qlip->qli_dquot;
+ struct xfs_buf *bp;
- dqp = qip->qli_dquot;
ASSERT(XFS_DQ_IS_LOCKED(dqp));
/*
@@ -220,22 +199,20 @@ xfs_qm_dquot_logitem_pushbuf(
* inode flush completed and the inode was taken off the AIL.
* So, just get out.
*/
- if (completion_done(&dqp->q_flush) ||
- ((qip->qli_item.li_flags & XFS_LI_IN_AIL) == 0)) {
+ if (completion_done(&dqp->q_flush) ||
+ !(lip->li_flags & XFS_LI_IN_AIL)) {
xfs_dqunlock(dqp);
return;
}
- mp = dqp->q_mount;
- bp = xfs_incore(mp->m_ddev_targp, qip->qli_format.qlf_blkno,
- mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
+
+ bp = xfs_incore(dqp->q_mount->m_ddev_targp, qlip->qli_format.qlf_blkno,
+ dqp->q_mount->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
xfs_dqunlock(dqp);
if (!bp)
return;
if (XFS_BUF_ISDELAYWRITE(bp))
xfs_buf_delwri_promote(bp);
xfs_buf_relse(bp);
- return;
-
}
/*
@@ -250,15 +227,14 @@ xfs_qm_dquot_logitem_pushbuf(
*/
STATIC uint
xfs_qm_dquot_logitem_trylock(
- xfs_dq_logitem_t *qip)
+ struct xfs_log_item *lip)
{
- xfs_dquot_t *dqp;
+ struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
- dqp = qip->qli_dquot;
if (atomic_read(&dqp->q_pincount) > 0)
return XFS_ITEM_PINNED;
- if (! xfs_qm_dqlock_nowait(dqp))
+ if (!xfs_qm_dqlock_nowait(dqp))
return XFS_ITEM_LOCKED;
if (!xfs_dqflock_nowait(dqp)) {
@@ -269,11 +245,10 @@ xfs_qm_dquot_logitem_trylock(
return XFS_ITEM_PUSHBUF;
}
- ASSERT(qip->qli_item.li_flags & XFS_LI_IN_AIL);
+ ASSERT(lip->li_flags & XFS_LI_IN_AIL);
return XFS_ITEM_SUCCESS;
}
-
/*
* Unlock the dquot associated with the log item.
* Clear the fields of the dquot and dquot log item that
@@ -282,12 +257,10 @@ xfs_qm_dquot_logitem_trylock(
*/
STATIC void
xfs_qm_dquot_logitem_unlock(
- xfs_dq_logitem_t *ql)
+ struct xfs_log_item *lip)
{
- xfs_dquot_t *dqp;
+ struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
- ASSERT(ql != NULL);
- dqp = ql->qli_dquot;
ASSERT(XFS_DQ_IS_LOCKED(dqp));
/*
@@ -304,43 +277,32 @@ xfs_qm_dquot_logitem_unlock(
xfs_dqunlock(dqp);
}
-
/*
* this needs to stamp an lsn into the dquot, I think.
* rpc's that look at user dquot's would then have to
* push on the dependency recorded in the dquot
*/
-/* ARGSUSED */
STATIC void
xfs_qm_dquot_logitem_committing(
- xfs_dq_logitem_t *l,
+ struct xfs_log_item *lip,
xfs_lsn_t lsn)
{
- return;
}
-
/*
* This is the ops vector for dquots
*/
static struct xfs_item_ops xfs_dquot_item_ops = {
- .iop_size = (uint(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_size,
- .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
- xfs_qm_dquot_logitem_format,
- .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_pin,
- .iop_unpin = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_unpin,
- .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*))
- xfs_qm_dquot_logitem_unpin_remove,
- .iop_trylock = (uint(*)(xfs_log_item_t*))
- xfs_qm_dquot_logitem_trylock,
- .iop_unlock = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_unlock,
- .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
- xfs_qm_dquot_logitem_committed,
- .iop_push = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_push,
- .iop_pushbuf = (void(*)(xfs_log_item_t*))
- xfs_qm_dquot_logitem_pushbuf,
- .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
- xfs_qm_dquot_logitem_committing
+ .iop_size = xfs_qm_dquot_logitem_size,
+ .iop_format = xfs_qm_dquot_logitem_format,
+ .iop_pin = xfs_qm_dquot_logitem_pin,
+ .iop_unpin = xfs_qm_dquot_logitem_unpin,
+ .iop_trylock = xfs_qm_dquot_logitem_trylock,
+ .iop_unlock = xfs_qm_dquot_logitem_unlock,
+ .iop_committed = xfs_qm_dquot_logitem_committed,
+ .iop_push = xfs_qm_dquot_logitem_push,
+ .iop_pushbuf = xfs_qm_dquot_logitem_pushbuf,
+ .iop_committing = xfs_qm_dquot_logitem_committing
};
/*
@@ -350,10 +312,9 @@ static struct xfs_item_ops xfs_dquot_item_ops = {
*/
void
xfs_qm_dquot_logitem_init(
- struct xfs_dquot *dqp)
+ struct xfs_dquot *dqp)
{
- xfs_dq_logitem_t *lp;
- lp = &dqp->q_logitem;
+ struct xfs_dq_logitem *lp = &dqp->q_logitem;
xfs_log_item_init(dqp->q_mount, &lp->qli_item, XFS_LI_DQUOT,
&xfs_dquot_item_ops);
@@ -374,16 +335,22 @@ xfs_qm_dquot_logitem_init(
/*------------------ QUOTAOFF LOG ITEMS -------------------*/
+static inline struct xfs_qoff_logitem *QOFF_ITEM(struct xfs_log_item *lip)
+{
+ return container_of(lip, struct xfs_qoff_logitem, qql_item);
+}
+
+
/*
* This returns the number of iovecs needed to log the given quotaoff item.
* We only need 1 iovec for an quotaoff item. It just logs the
* quotaoff_log_format structure.
*/
-/*ARGSUSED*/
STATIC uint
-xfs_qm_qoff_logitem_size(xfs_qoff_logitem_t *qf)
+xfs_qm_qoff_logitem_size(
+ struct xfs_log_item *lip)
{
- return (1);
+ return 1;
}
/*
@@ -394,53 +361,46 @@ xfs_qm_qoff_logitem_size(xfs_qoff_logitem_t *qf)
* slots in the quotaoff item have been filled.
*/
STATIC void
-xfs_qm_qoff_logitem_format(xfs_qoff_logitem_t *qf,
- xfs_log_iovec_t *log_vector)
+xfs_qm_qoff_logitem_format(
+ struct xfs_log_item *lip,
+ struct xfs_log_iovec *log_vector)
{
- ASSERT(qf->qql_format.qf_type == XFS_LI_QUOTAOFF);
+ struct xfs_qoff_logitem *qflip = QOFF_ITEM(lip);
+
+ ASSERT(qflip->qql_format.qf_type == XFS_LI_QUOTAOFF);
- log_vector->i_addr = (xfs_caddr_t)&(qf->qql_format);
+ log_vector->i_addr = &qflip->qql_format;
log_vector->i_len = sizeof(xfs_qoff_logitem_t);
log_vector->i_type = XLOG_REG_TYPE_QUOTAOFF;
- qf->qql_format.qf_size = 1;
+ qflip->qql_format.qf_size = 1;
}
-
/*
* Pinning has no meaning for an quotaoff item, so just return.
*/
-/*ARGSUSED*/
STATIC void
-xfs_qm_qoff_logitem_pin(xfs_qoff_logitem_t *qf)
+xfs_qm_qoff_logitem_pin(
+ struct xfs_log_item *lip)
{
- return;
}
-
/*
* Since pinning has no meaning for an quotaoff item, unpinning does
* not either.
*/
-/*ARGSUSED*/
STATIC void
-xfs_qm_qoff_logitem_unpin(xfs_qoff_logitem_t *qf)
+xfs_qm_qoff_logitem_unpin(
+ struct xfs_log_item *lip,
+ int remove)
{
- return;
-}
-
-/*ARGSUSED*/
-STATIC void
-xfs_qm_qoff_logitem_unpin_remove(xfs_qoff_logitem_t *qf, xfs_trans_t *tp)
-{
- return;
}
/*
* Quotaoff items have no locking, so just return success.
*/
-/*ARGSUSED*/
STATIC uint
-xfs_qm_qoff_logitem_trylock(xfs_qoff_logitem_t *qf)
+xfs_qm_qoff_logitem_trylock(
+ struct xfs_log_item *lip)
{
return XFS_ITEM_LOCKED;
}
@@ -449,53 +409,51 @@ xfs_qm_qoff_logitem_trylock(xfs_qoff_logitem_t *qf)
* Quotaoff items have no locking or pushing, so return failure
* so that the caller doesn't bother with us.
*/
-/*ARGSUSED*/
STATIC void
-xfs_qm_qoff_logitem_unlock(xfs_qoff_logitem_t *qf)
+xfs_qm_qoff_logitem_unlock(
+ struct xfs_log_item *lip)
{
- return;
}
/*
* The quotaoff-start-item is logged only once and cannot be moved in the log,
* so simply return the lsn at which it's been logged.
*/
-/*ARGSUSED*/
STATIC xfs_lsn_t
-xfs_qm_qoff_logitem_committed(xfs_qoff_logitem_t *qf, xfs_lsn_t lsn)
+xfs_qm_qoff_logitem_committed(
+ struct xfs_log_item *lip,
+ xfs_lsn_t lsn)
{
- return (lsn);
+ return lsn;
}
/*
* There isn't much you can do to push on an quotaoff item. It is simply
* stuck waiting for the log to be flushed to disk.
*/
-/*ARGSUSED*/
STATIC void
-xfs_qm_qoff_logitem_push(xfs_qoff_logitem_t *qf)
+xfs_qm_qoff_logitem_push(
+ struct xfs_log_item *lip)
{
- return;
}
-/*ARGSUSED*/
STATIC xfs_lsn_t
xfs_qm_qoffend_logitem_committed(
- xfs_qoff_logitem_t *qfe,
- xfs_lsn_t lsn)
+ struct xfs_log_item *lip,
+ xfs_lsn_t lsn)
{
- xfs_qoff_logitem_t *qfs;
- struct xfs_ail *ailp;
+ struct xfs_qoff_logitem *qfe = QOFF_ITEM(lip);
+ struct xfs_qoff_logitem *qfs = qfe->qql_start_lip;
+ struct xfs_ail *ailp = qfs->qql_item.li_ailp;
- qfs = qfe->qql_start_lip;
- ailp = qfs->qql_item.li_ailp;
- spin_lock(&ailp->xa_lock);
/*
* Delete the qoff-start logitem from the AIL.
* xfs_trans_ail_delete() drops the AIL lock.
*/
+ spin_lock(&ailp->xa_lock);
xfs_trans_ail_delete(ailp, (xfs_log_item_t *)qfs);
+
kmem_free(qfs);
kmem_free(qfe);
return (xfs_lsn_t)-1;
@@ -515,71 +473,52 @@ xfs_qm_qoffend_logitem_committed(
* (truly makes the quotaoff irrevocable). If we do something else,
* then maybe we don't need two.
*/
-/* ARGSUSED */
-STATIC void
-xfs_qm_qoff_logitem_committing(xfs_qoff_logitem_t *qip, xfs_lsn_t commit_lsn)
-{
- return;
-}
-
-/* ARGSUSED */
STATIC void
-xfs_qm_qoffend_logitem_committing(xfs_qoff_logitem_t *qip, xfs_lsn_t commit_lsn)
+xfs_qm_qoff_logitem_committing(
+ struct xfs_log_item *lip,
+ xfs_lsn_t commit_lsn)
{
- return;
}
static struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
- .iop_size = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_size,
- .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
- xfs_qm_qoff_logitem_format,
- .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_pin,
- .iop_unpin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unpin,
- .iop_unpin_remove = (void(*)(xfs_log_item_t*,xfs_trans_t*))
- xfs_qm_qoff_logitem_unpin_remove,
- .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_trylock,
- .iop_unlock = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unlock,
- .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
- xfs_qm_qoffend_logitem_committed,
- .iop_push = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_push,
- .iop_pushbuf = NULL,
- .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
- xfs_qm_qoffend_logitem_committing
+ .iop_size = xfs_qm_qoff_logitem_size,
+ .iop_format = xfs_qm_qoff_logitem_format,
+ .iop_pin = xfs_qm_qoff_logitem_pin,
+ .iop_unpin = xfs_qm_qoff_logitem_unpin,
+ .iop_trylock = xfs_qm_qoff_logitem_trylock,
+ .iop_unlock = xfs_qm_qoff_logitem_unlock,
+ .iop_committed = xfs_qm_qoffend_logitem_committed,
+ .iop_push = xfs_qm_qoff_logitem_push,
+ .iop_committing = xfs_qm_qoff_logitem_committing
};
/*
* This is the ops vector shared by all quotaoff-start log items.
*/
static struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
- .iop_size = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_size,
- .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
- xfs_qm_qoff_logitem_format,
- .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_pin,
- .iop_unpin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unpin,
- .iop_unpin_remove = (void(*)(xfs_log_item_t*,xfs_trans_t*))
- xfs_qm_qoff_logitem_unpin_remove,
- .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_trylock,
- .iop_unlock = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unlock,
- .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
- xfs_qm_qoff_logitem_committed,
- .iop_push = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_push,
- .iop_pushbuf = NULL,
- .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
- xfs_qm_qoff_logitem_committing
+ .iop_size = xfs_qm_qoff_logitem_size,
+ .iop_format = xfs_qm_qoff_logitem_format,
+ .iop_pin = xfs_qm_qoff_logitem_pin,
+ .iop_unpin = xfs_qm_qoff_logitem_unpin,
+ .iop_trylock = xfs_qm_qoff_logitem_trylock,
+ .iop_unlock = xfs_qm_qoff_logitem_unlock,
+ .iop_committed = xfs_qm_qoff_logitem_committed,
+ .iop_push = xfs_qm_qoff_logitem_push,
+ .iop_committing = xfs_qm_qoff_logitem_committing
};
/*
* Allocate and initialize an quotaoff item of the correct quota type(s).
*/
-xfs_qoff_logitem_t *
+struct xfs_qoff_logitem *
xfs_qm_qoff_logitem_init(
- struct xfs_mount *mp,
- xfs_qoff_logitem_t *start,
- uint flags)
+ struct xfs_mount *mp,
+ struct xfs_qoff_logitem *start,
+ uint flags)
{
- xfs_qoff_logitem_t *qf;
+ struct xfs_qoff_logitem *qf;
- qf = (xfs_qoff_logitem_t*) kmem_zalloc(sizeof(xfs_qoff_logitem_t), KM_SLEEP);
+ qf = kmem_zalloc(sizeof(struct xfs_qoff_logitem), KM_SLEEP);
xfs_log_item_init(mp, &qf->qql_item, XFS_LI_QUOTAOFF, start ?
&xfs_qm_qoffend_logitem_ops : &xfs_qm_qoff_logitem_ops);
@@ -587,5 +526,5 @@ xfs_qm_qoff_logitem_init(
qf->qql_format.qf_type = XFS_LI_QUOTAOFF;
qf->qql_format.qf_flags = flags;
qf->qql_start_lip = start;
- return (qf);
+ return qf;
}
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 2d8b7bc792c..9a92407109a 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -23,25 +23,18 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
#include "xfs_alloc.h"
-#include "xfs_dmapi.h"
#include "xfs_quota.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
-#include "xfs_btree.h"
#include "xfs_ialloc.h"
#include "xfs_itable.h"
#include "xfs_rtalloc.h"
#include "xfs_error.h"
#include "xfs_bmap.h"
-#include "xfs_rw.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_trans_space.h"
@@ -69,7 +62,7 @@ STATIC void xfs_qm_list_destroy(xfs_dqlist_t *);
STATIC int xfs_qm_init_quotainos(xfs_mount_t *);
STATIC int xfs_qm_init_quotainfo(xfs_mount_t *);
-STATIC int xfs_qm_shake(int, gfp_t);
+STATIC int xfs_qm_shake(struct shrinker *, int, gfp_t);
static struct shrinker xfs_qm_shaker = {
.shrink = xfs_qm_shake,
@@ -1497,7 +1490,7 @@ xfs_qm_dqiterate(
maxlblkcnt - lblkno,
XFS_BMAPI_METADATA,
NULL,
- 0, map, &nmaps, NULL, NULL);
+ 0, map, &nmaps, NULL);
xfs_iunlock(qip, XFS_ILOCK_SHARED);
if (error)
break;
@@ -1632,10 +1625,7 @@ xfs_qm_dqusage_adjust(
xfs_ino_t ino, /* inode number to get data for */
void __user *buffer, /* not used */
int ubsize, /* not used */
- void *private_data, /* not used */
- xfs_daddr_t bno, /* starting block of inode cluster */
int *ubused, /* not used */
- void *dip, /* on-disk inode pointer (not used) */
int *res) /* result code value */
{
xfs_inode_t *ip;
@@ -1660,7 +1650,7 @@ xfs_qm_dqusage_adjust(
* the case in all other instances. It's OK that we do this because
* quotacheck is done only at mount time.
*/
- if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip, bno))) {
+ if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip))) {
*res = BULKSTAT_RV_NOTHING;
return error;
}
@@ -1672,7 +1662,8 @@ xfs_qm_dqusage_adjust(
* making us disable quotas for the file system.
*/
if ((error = xfs_qm_dqget_noattach(ip, &udqp, &gdqp))) {
- xfs_iput(ip, XFS_ILOCK_EXCL);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ IRELE(ip);
*res = BULKSTAT_RV_GIVEUP;
return error;
}
@@ -1685,7 +1676,8 @@ xfs_qm_dqusage_adjust(
* Walk thru the extent list and count the realtime blocks.
*/
if ((error = xfs_qm_get_rtblks(ip, &rtblks))) {
- xfs_iput(ip, XFS_ILOCK_EXCL);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ IRELE(ip);
if (udqp)
xfs_qm_dqput(udqp);
if (gdqp)
@@ -1796,12 +1788,13 @@ xfs_qm_quotacheck(
* Iterate thru all the inodes in the file system,
* adjusting the corresponding dquot counters in core.
*/
- if ((error = xfs_bulkstat(mp, &lastino, &count,
- xfs_qm_dqusage_adjust, NULL,
- structsz, NULL, BULKSTAT_FG_IGET, &done)))
+ error = xfs_bulkstat(mp, &lastino, &count,
+ xfs_qm_dqusage_adjust,
+ structsz, NULL, &done);
+ if (error)
break;
- } while (! done);
+ } while (!done);
/*
* We've made all the changes that we need to make incore.
@@ -1889,14 +1882,14 @@ xfs_qm_init_quotainos(
mp->m_sb.sb_uquotino != NULLFSINO) {
ASSERT(mp->m_sb.sb_uquotino > 0);
if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
- 0, 0, &uip, 0)))
+ 0, 0, &uip)))
return XFS_ERROR(error);
}
if (XFS_IS_OQUOTA_ON(mp) &&
mp->m_sb.sb_gquotino != NULLFSINO) {
ASSERT(mp->m_sb.sb_gquotino > 0);
if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
- 0, 0, &gip, 0))) {
+ 0, 0, &gip))) {
if (uip)
IRELE(uip);
return XFS_ERROR(error);
@@ -2119,7 +2112,10 @@ xfs_qm_shake_freelist(
*/
/* ARGSUSED */
STATIC int
-xfs_qm_shake(int nr_to_scan, gfp_t gfp_mask)
+xfs_qm_shake(
+ struct shrinker *shrink,
+ int nr_to_scan,
+ gfp_t gfp_mask)
{
int ndqused, nfree, n;
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index 97b410c1279..bea02d786c5 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -23,25 +23,15 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
#include "xfs_alloc.h"
-#include "xfs_dmapi.h"
#include "xfs_quota.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dinode.h"
#include "xfs_inode.h"
-#include "xfs_ialloc.h"
#include "xfs_itable.h"
-#include "xfs_btree.h"
#include "xfs_bmap.h"
#include "xfs_rtalloc.h"
#include "xfs_error.h"
-#include "xfs_rw.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_qm.h"
diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c
index 3d1fc79532e..8671a0b3264 100644
--- a/fs/xfs/quota/xfs_qm_stats.c
+++ b/fs/xfs/quota/xfs_qm_stats.c
@@ -23,25 +23,15 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
#include "xfs_alloc.h"
-#include "xfs_dmapi.h"
#include "xfs_quota.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dinode.h"
#include "xfs_inode.h"
-#include "xfs_ialloc.h"
#include "xfs_itable.h"
#include "xfs_bmap.h"
-#include "xfs_btree.h"
#include "xfs_rtalloc.h"
#include "xfs_error.h"
-#include "xfs_rw.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_qm.h"
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 92b002f1805..d257eb8557c 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -26,25 +26,15 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
#include "xfs_alloc.h"
-#include "xfs_dmapi.h"
#include "xfs_quota.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dinode.h"
#include "xfs_inode.h"
-#include "xfs_ialloc.h"
#include "xfs_itable.h"
#include "xfs_bmap.h"
-#include "xfs_btree.h"
#include "xfs_rtalloc.h"
#include "xfs_error.h"
-#include "xfs_rw.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_utils.h"
@@ -248,40 +238,74 @@ out_unlock:
return error;
}
+STATIC int
+xfs_qm_scall_trunc_qfile(
+ struct xfs_mount *mp,
+ xfs_ino_t ino)
+{
+ struct xfs_inode *ip;
+ struct xfs_trans *tp;
+ int error;
+
+ if (ino == NULLFSINO)
+ return 0;
+
+ error = xfs_iget(mp, NULL, ino, 0, 0, &ip);
+ if (error)
+ return error;
+
+ xfs_ilock(ip, XFS_IOLOCK_EXCL);
+
+ tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE);
+ error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
+ XFS_TRANS_PERM_LOG_RES,
+ XFS_ITRUNCATE_LOG_COUNT);
+ if (error) {
+ xfs_trans_cancel(tp, 0);
+ xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+ goto out_put;
+ }
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, ip);
+
+ error = xfs_itruncate_finish(&tp, ip, 0, XFS_DATA_FORK, 1);
+ if (error) {
+ xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
+ XFS_TRANS_ABORT);
+ goto out_unlock;
+ }
+
+ xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+ error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+
+out_unlock:
+ xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+out_put:
+ IRELE(ip);
+ return error;
+}
+
int
xfs_qm_scall_trunc_qfiles(
xfs_mount_t *mp,
uint flags)
{
int error = 0, error2 = 0;
- xfs_inode_t *qip;
if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) {
qdprintk("qtrunc flags=%x m_qflags=%x\n", flags, mp->m_qflags);
return XFS_ERROR(EINVAL);
}
- if ((flags & XFS_DQ_USER) && mp->m_sb.sb_uquotino != NULLFSINO) {
- error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &qip, 0);
- if (!error) {
- error = xfs_truncate_file(mp, qip);
- IRELE(qip);
- }
- }
-
- if ((flags & (XFS_DQ_GROUP|XFS_DQ_PROJ)) &&
- mp->m_sb.sb_gquotino != NULLFSINO) {
- error2 = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip, 0);
- if (!error2) {
- error2 = xfs_truncate_file(mp, qip);
- IRELE(qip);
- }
- }
+ if (flags & XFS_DQ_USER)
+ error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_uquotino);
+ if (flags & (XFS_DQ_GROUP|XFS_DQ_PROJ))
+ error2 = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_gquotino);
return error ? error : error2;
}
-
/*
* Switch on (a given) quota enforcement for a filesystem. This takes
* effect immediately.
@@ -417,12 +441,12 @@ xfs_qm_scall_getqstat(
}
if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
- 0, 0, &uip, 0) == 0)
+ 0, 0, &uip) == 0)
tempuqip = B_TRUE;
}
if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) {
if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
- 0, 0, &gip, 0) == 0)
+ 0, 0, &gip) == 0)
tempgqip = B_TRUE;
}
if (uip) {
@@ -875,8 +899,9 @@ xfs_dqrele_inode(
xfs_qm_dqrele(ip->i_gdquot);
ip->i_gdquot = NULL;
}
- xfs_iput(ip, XFS_ILOCK_EXCL);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ IRELE(ip);
return 0;
}
@@ -1109,10 +1134,7 @@ xfs_qm_internalqcheck_adjust(
xfs_ino_t ino, /* inode number to get data for */
void __user *buffer, /* not used */
int ubsize, /* not used */
- void *private_data, /* not used */
- xfs_daddr_t bno, /* starting block of inode cluster */
int *ubused, /* not used */
- void *dip, /* not used */
int *res) /* bulkstat result code */
{
xfs_inode_t *ip;
@@ -1134,7 +1156,7 @@ xfs_qm_internalqcheck_adjust(
ipreleased = B_FALSE;
again:
lock_flags = XFS_ILOCK_SHARED;
- if ((error = xfs_iget(mp, NULL, ino, 0, lock_flags, &ip, bno))) {
+ if ((error = xfs_iget(mp, NULL, ino, 0, lock_flags, &ip))) {
*res = BULKSTAT_RV_NOTHING;
return (error);
}
@@ -1146,7 +1168,8 @@ xfs_qm_internalqcheck_adjust(
* of those now.
*/
if (! ipreleased) {
- xfs_iput(ip, lock_flags);
+ xfs_iunlock(ip, lock_flags);
+ IRELE(ip);
ipreleased = B_TRUE;
goto again;
}
@@ -1163,7 +1186,8 @@ xfs_qm_internalqcheck_adjust(
ASSERT(gd);
xfs_qm_internalqcheck_dqadjust(ip, gd);
}
- xfs_iput(ip, lock_flags);
+ xfs_iunlock(ip, lock_flags);
+ IRELE(ip);
*res = BULKSTAT_RV_DIDONE;
return (0);
}
@@ -1205,15 +1229,15 @@ xfs_qm_internalqcheck(
* Iterate thru all the inodes in the file system,
* adjusting the corresponding dquot counters
*/
- if ((error = xfs_bulkstat(mp, &lastino, &count,
- xfs_qm_internalqcheck_adjust, NULL,
- 0, NULL, BULKSTAT_FG_IGET, &done))) {
+ error = xfs_bulkstat(mp, &lastino, &count,
+ xfs_qm_internalqcheck_adjust,
+ 0, NULL, &done);
+ if (error) {
+ cmn_err(CE_DEBUG, "Bulkstat returned error 0x%x", error);
break;
}
- } while (! done);
- if (error) {
- cmn_err(CE_DEBUG, "Bulkstat returned error 0x%x", error);
- }
+ } while (!done);
+
cmn_err(CE_DEBUG, "Checking results against system dquots");
for (i = 0; i < qmtest_hashmask; i++) {
xfs_dqtest_t *d, *n;
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index 061d827da33..7de91d1b75c 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -23,25 +23,15 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
#include "xfs_alloc.h"
-#include "xfs_dmapi.h"
#include "xfs_quota.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_dinode.h"
#include "xfs_inode.h"
-#include "xfs_ialloc.h"
#include "xfs_itable.h"
-#include "xfs_btree.h"
#include "xfs_bmap.h"
#include "xfs_rtalloc.h"
#include "xfs_error.h"
-#include "xfs_rw.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_trans_priv.h"
@@ -59,16 +49,14 @@ xfs_trans_dqjoin(
xfs_trans_t *tp,
xfs_dquot_t *dqp)
{
- xfs_dq_logitem_t *lp = &dqp->q_logitem;
-
ASSERT(dqp->q_transp != tp);
ASSERT(XFS_DQ_IS_LOCKED(dqp));
- ASSERT(lp->qli_dquot == dqp);
+ ASSERT(dqp->q_logitem.qli_dquot == dqp);
/*
* Get a log_item_desc to point at the new item.
*/
- (void) xfs_trans_add_item(tp, (xfs_log_item_t*)(lp));
+ xfs_trans_add_item(tp, &dqp->q_logitem.qli_item);
/*
* Initialize i_transp so we can later determine if this dquot is
@@ -93,16 +81,11 @@ xfs_trans_log_dquot(
xfs_trans_t *tp,
xfs_dquot_t *dqp)
{
- xfs_log_item_desc_t *lidp;
-
ASSERT(dqp->q_transp == tp);
ASSERT(XFS_DQ_IS_LOCKED(dqp));
- lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)(&dqp->q_logitem));
- ASSERT(lidp != NULL);
-
tp->t_flags |= XFS_TRANS_DIRTY;
- lidp->lid_flags |= XFS_LID_DIRTY;
+ dqp->q_logitem.qli_item.li_desc->lid_flags |= XFS_LID_DIRTY;
}
/*
@@ -874,9 +857,8 @@ xfs_trans_get_qoff_item(
/*
* Get a log_item_desc to point at the new item.
*/
- (void) xfs_trans_add_item(tp, (xfs_log_item_t*)q);
-
- return (q);
+ xfs_trans_add_item(tp, &q->qql_item);
+ return q;
}
@@ -890,13 +872,8 @@ xfs_trans_log_quotaoff_item(
xfs_trans_t *tp,
xfs_qoff_logitem_t *qlp)
{
- xfs_log_item_desc_t *lidp;
-
- lidp = xfs_trans_find_item(tp, (xfs_log_item_t *)qlp);
- ASSERT(lidp != NULL);
-
tp->t_flags |= XFS_TRANS_DIRTY;
- lidp->lid_flags |= XFS_LID_DIRTY;
+ qlp->qql_item.li_desc->lid_flags |= XFS_LID_DIRTY;
}
STATIC void
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
index 3f3610a7ee0..975aa10e1a4 100644
--- a/fs/xfs/support/debug.c
+++ b/fs/xfs/support/debug.c
@@ -22,7 +22,6 @@
#include "xfs_sb.h"
#include "xfs_inum.h"
#include "xfs_ag.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_error.h"
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index a7fbe8a99b1..af168faccc7 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -24,18 +24,13 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
#include "xfs_alloc_btree.h"
#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
-#include "xfs_ialloc.h"
#include "xfs_alloc.h"
#include "xfs_error.h"
#include "xfs_trace.h"
@@ -688,8 +683,6 @@ xfs_alloc_ag_vextent_near(
xfs_agblock_t ltbno; /* start bno of left side entry */
xfs_agblock_t ltbnoa; /* aligned ... */
xfs_extlen_t ltdiff; /* difference to left side entry */
- /*REFERENCED*/
- xfs_agblock_t ltend; /* end bno of left side entry */
xfs_extlen_t ltlen; /* length of left side entry */
xfs_extlen_t ltlena; /* aligned ... */
xfs_agblock_t ltnew; /* useful start bno of left side */
@@ -814,8 +807,7 @@ xfs_alloc_ag_vextent_near(
if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
- ltend = ltbno + ltlen;
- ASSERT(ltend <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
+ ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
args->len = blen;
if (!xfs_alloc_fix_minleft(args)) {
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
@@ -828,7 +820,7 @@ xfs_alloc_ag_vextent_near(
*/
args->agbno = bnew;
ASSERT(bnew >= ltbno);
- ASSERT(bnew + blen <= ltend);
+ ASSERT(bnew + blen <= ltbno + ltlen);
/*
* Set up a cursor for the by-bno tree.
*/
@@ -1157,7 +1149,6 @@ xfs_alloc_ag_vextent_near(
/*
* Fix up the length and compute the useful address.
*/
- ltend = ltbno + ltlen;
args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
xfs_alloc_fix_len(args);
if (!xfs_alloc_fix_minleft(args)) {
@@ -1170,7 +1161,7 @@ xfs_alloc_ag_vextent_near(
(void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno,
ltlen, &ltnew);
ASSERT(ltnew >= ltbno);
- ASSERT(ltnew + rlen <= ltend);
+ ASSERT(ltnew + rlen <= ltbno + ltlen);
ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
args->agbno = ltnew;
if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 6d05199b667..895009a9727 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -27,16 +27,16 @@ struct xfs_busy_extent;
/*
* Freespace allocation types. Argument to xfs_alloc_[v]extent.
*/
-typedef enum xfs_alloctype
-{
- XFS_ALLOCTYPE_ANY_AG, /* allocate anywhere, use rotor */
- XFS_ALLOCTYPE_FIRST_AG, /* ... start at ag 0 */
- XFS_ALLOCTYPE_START_AG, /* anywhere, start in this a.g. */
- XFS_ALLOCTYPE_THIS_AG, /* anywhere in this a.g. */
- XFS_ALLOCTYPE_START_BNO, /* near this block else anywhere */
- XFS_ALLOCTYPE_NEAR_BNO, /* in this a.g. and near this block */
- XFS_ALLOCTYPE_THIS_BNO /* at exactly this block */
-} xfs_alloctype_t;
+#define XFS_ALLOCTYPE_ANY_AG 0x01 /* allocate anywhere, use rotor */
+#define XFS_ALLOCTYPE_FIRST_AG 0x02 /* ... start at ag 0 */
+#define XFS_ALLOCTYPE_START_AG 0x04 /* anywhere, start in this a.g. */
+#define XFS_ALLOCTYPE_THIS_AG 0x08 /* anywhere in this a.g. */
+#define XFS_ALLOCTYPE_START_BNO 0x10 /* near this block else anywhere */
+#define XFS_ALLOCTYPE_NEAR_BNO 0x20 /* in this a.g. and near this block */
+#define XFS_ALLOCTYPE_THIS_BNO 0x40 /* at exactly this block */
+
+/* this should become an enum again when the tracing code is fixed */
+typedef unsigned int xfs_alloctype_t;
#define XFS_ALLOC_TYPES \
{ XFS_ALLOCTYPE_ANY_AG, "ANY_AG" }, \
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index 83f49421875..97f7328967f 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -24,19 +24,14 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
#include "xfs_alloc_btree.h"
#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
#include "xfs_btree_trace.h"
-#include "xfs_ialloc.h"
#include "xfs_alloc.h"
#include "xfs_error.h"
#include "xfs_trace.h"
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index b9c196a53c4..c2568242a90 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -25,19 +25,13 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_da_btree.h"
#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
#include "xfs_attr_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_alloc.h"
-#include "xfs_btree.h"
#include "xfs_inode_item.h"
#include "xfs_bmap.h"
#include "xfs_attr.h"
@@ -325,8 +319,7 @@ xfs_attr_set_int(
return (error);
}
- xfs_trans_ijoin(args.trans, dp, XFS_ILOCK_EXCL);
- xfs_trans_ihold(args.trans, dp);
+ xfs_trans_ijoin(args.trans, dp);
/*
* If the attribute list is non-existent or a shortform list,
@@ -396,10 +389,8 @@ xfs_attr_set_int(
* bmap_finish() may have committed the last trans and started
* a new one. We need the inode to be in all transactions.
*/
- if (committed) {
- xfs_trans_ijoin(args.trans, dp, XFS_ILOCK_EXCL);
- xfs_trans_ihold(args.trans, dp);
- }
+ if (committed)
+ xfs_trans_ijoin(args.trans, dp);
/*
* Commit the leaf transformation. We'll need another (linked)
@@ -544,8 +535,7 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
* No need to make quota reservations here. We expect to release some
* blocks not allocate in the common case.
*/
- xfs_trans_ijoin(args.trans, dp, XFS_ILOCK_EXCL);
- xfs_trans_ihold(args.trans, dp);
+ xfs_trans_ijoin(args.trans, dp);
/*
* Decide on what work routines to call based on the inode size.
@@ -821,8 +811,7 @@ xfs_attr_inactive(xfs_inode_t *dp)
* No need to make quota reservations here. We expect to release some
* blocks, not allocate, in the common case.
*/
- xfs_trans_ijoin(trans, dp, XFS_ILOCK_EXCL);
- xfs_trans_ihold(trans, dp);
+ xfs_trans_ijoin(trans, dp);
/*
* Decide on what work routines to call based on the inode size.
@@ -981,10 +970,8 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
* bmap_finish() may have committed the last trans and started
* a new one. We need the inode to be in all transactions.
*/
- if (committed) {
- xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
- xfs_trans_ihold(args->trans, dp);
- }
+ if (committed)
+ xfs_trans_ijoin(args->trans, dp);
/*
* Commit the current trans (including the inode) and start
@@ -1085,10 +1072,8 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
* and started a new one. We need the inode to be
* in all transactions.
*/
- if (committed) {
- xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
- xfs_trans_ihold(args->trans, dp);
- }
+ if (committed)
+ xfs_trans_ijoin(args->trans, dp);
} else
xfs_da_buf_done(bp);
@@ -1161,10 +1146,8 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
* bmap_finish() may have committed the last trans and started
* a new one. We need the inode to be in all transactions.
*/
- if (committed) {
- xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
- xfs_trans_ihold(args->trans, dp);
- }
+ if (committed)
+ xfs_trans_ijoin(args->trans, dp);
} else
xfs_da_buf_done(bp);
return(0);
@@ -1317,10 +1300,8 @@ restart:
* and started a new one. We need the inode to be
* in all transactions.
*/
- if (committed) {
- xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
- xfs_trans_ihold(args->trans, dp);
- }
+ if (committed)
+ xfs_trans_ijoin(args->trans, dp);
/*
* Commit the node conversion and start the next
@@ -1356,10 +1337,8 @@ restart:
* bmap_finish() may have committed the last trans and started
* a new one. We need the inode to be in all transactions.
*/
- if (committed) {
- xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
- xfs_trans_ihold(args->trans, dp);
- }
+ if (committed)
+ xfs_trans_ijoin(args->trans, dp);
} else {
/*
* Addition succeeded, update Btree hashvals.
@@ -1470,10 +1449,8 @@ restart:
* and started a new one. We need the inode to be
* in all transactions.
*/
- if (committed) {
- xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
- xfs_trans_ihold(args->trans, dp);
- }
+ if (committed)
+ xfs_trans_ijoin(args->trans, dp);
}
/*
@@ -1604,10 +1581,8 @@ xfs_attr_node_removename(xfs_da_args_t *args)
* bmap_finish() may have committed the last trans and started
* a new one. We need the inode to be in all transactions.
*/
- if (committed) {
- xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
- xfs_trans_ihold(args->trans, dp);
- }
+ if (committed)
+ xfs_trans_ijoin(args->trans, dp);
/*
* Commit the Btree join operation and start a new trans.
@@ -1658,10 +1633,8 @@ xfs_attr_node_removename(xfs_da_args_t *args)
* and started a new one. We need the inode to be
* in all transactions.
*/
- if (committed) {
- xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
- xfs_trans_ihold(args->trans, dp);
- }
+ if (committed)
+ xfs_trans_ijoin(args->trans, dp);
} else
xfs_da_brelse(args->trans, bp);
}
@@ -2004,7 +1977,7 @@ xfs_attr_rmtval_get(xfs_da_args_t *args)
error = xfs_bmapi(args->trans, args->dp, (xfs_fileoff_t)lblkno,
args->rmtblkcnt,
XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
- NULL, 0, map, &nmap, NULL, NULL);
+ NULL, 0, map, &nmap, NULL);
if (error)
return(error);
ASSERT(nmap >= 1);
@@ -2083,7 +2056,7 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA |
XFS_BMAPI_WRITE,
args->firstblock, args->total, &map, &nmap,
- args->flist, NULL);
+ args->flist);
if (!error) {
error = xfs_bmap_finish(&args->trans, args->flist,
&committed);
@@ -2099,10 +2072,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
* bmap_finish() may have committed the last trans and started
* a new one. We need the inode to be in all transactions.
*/
- if (committed) {
- xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
- xfs_trans_ihold(args->trans, dp);
- }
+ if (committed)
+ xfs_trans_ijoin(args->trans, dp);
ASSERT(nmap == 1);
ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
@@ -2136,7 +2107,7 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
args->rmtblkcnt,
XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
args->firstblock, 0, &map, &nmap,
- NULL, NULL);
+ NULL);
if (error) {
return(error);
}
@@ -2201,7 +2172,7 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
args->rmtblkcnt,
XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
args->firstblock, 0, &map, &nmap,
- args->flist, NULL);
+ args->flist);
if (error) {
return(error);
}
@@ -2239,7 +2210,7 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
1, args->firstblock, args->flist,
- NULL, &done);
+ &done);
if (!error) {
error = xfs_bmap_finish(&args->trans, args->flist,
&committed);
@@ -2255,10 +2226,8 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
* bmap_finish() may have committed the last trans and started
* a new one. We need the inode to be in all transactions.
*/
- if (committed) {
- xfs_trans_ijoin(args->trans, args->dp, XFS_ILOCK_EXCL);
- xfs_trans_ihold(args->trans, args->dp);
- }
+ if (committed)
+ xfs_trans_ijoin(args->trans, args->dp);
/*
* Close out trans and start the next one in the chain.
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index a90ce74fc25..a6cff8edcdb 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -24,8 +24,6 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_da_btree.h"
#include "xfs_bmap_btree.h"
@@ -33,7 +31,6 @@
#include "xfs_ialloc_btree.h"
#include "xfs_alloc.h"
#include "xfs_btree.h"
-#include "xfs_dir2_sf.h"
#include "xfs_attr_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
@@ -2931,7 +2928,7 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp,
nmap = 1;
error = xfs_bmapi(*trans, dp, (xfs_fileoff_t)tblkno, tblkcnt,
XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
- NULL, 0, &map, &nmap, NULL, NULL);
+ NULL, 0, &map, &nmap, NULL);
if (error) {
return(error);
}
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 99587ded043..23f14e595c1 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -30,13 +30,10 @@
#include "xfs_alloc_btree.h"
#include "xfs_ialloc_btree.h"
#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
-#include "xfs_ialloc.h"
#include "xfs_itable.h"
#include "xfs_dir2_data.h"
#include "xfs_dir2_leaf.h"
@@ -104,7 +101,6 @@ xfs_bmap_add_extent(
xfs_fsblock_t *first, /* pointer to firstblock variable */
xfs_bmap_free_t *flist, /* list of extents to be freed */
int *logflagsp, /* inode logging flags */
- xfs_extdelta_t *delta, /* Change made to incore extents */
int whichfork, /* data or attr fork */
int rsvd); /* OK to allocate reserved blocks */
@@ -122,7 +118,6 @@ xfs_bmap_add_extent_delay_real(
xfs_fsblock_t *first, /* pointer to firstblock variable */
xfs_bmap_free_t *flist, /* list of extents to be freed */
int *logflagsp, /* inode logging flags */
- xfs_extdelta_t *delta, /* Change made to incore extents */
int rsvd); /* OK to allocate reserved blocks */
/*
@@ -135,7 +130,6 @@ xfs_bmap_add_extent_hole_delay(
xfs_extnum_t idx, /* extent number to update/insert */
xfs_bmbt_irec_t *new, /* new data to add to file extents */
int *logflagsp,/* inode logging flags */
- xfs_extdelta_t *delta, /* Change made to incore extents */
int rsvd); /* OK to allocate reserved blocks */
/*
@@ -149,7 +143,6 @@ xfs_bmap_add_extent_hole_real(
xfs_btree_cur_t *cur, /* if null, not a btree */
xfs_bmbt_irec_t *new, /* new data to add to file extents */
int *logflagsp, /* inode logging flags */
- xfs_extdelta_t *delta, /* Change made to incore extents */
int whichfork); /* data or attr fork */
/*
@@ -162,8 +155,7 @@ xfs_bmap_add_extent_unwritten_real(
xfs_extnum_t idx, /* extent number to update/insert */
xfs_btree_cur_t **curp, /* if *curp is null, not a btree */
xfs_bmbt_irec_t *new, /* new data to add to file extents */
- int *logflagsp, /* inode logging flags */
- xfs_extdelta_t *delta); /* Change made to incore extents */
+ int *logflagsp); /* inode logging flags */
/*
* xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
@@ -200,7 +192,6 @@ xfs_bmap_del_extent(
xfs_btree_cur_t *cur, /* if null, not a btree */
xfs_bmbt_irec_t *new, /* new data to add to file extents */
int *logflagsp,/* inode logging flags */
- xfs_extdelta_t *delta, /* Change made to incore extents */
int whichfork, /* data or attr fork */
int rsvd); /* OK to allocate reserved blocks */
@@ -489,7 +480,6 @@ xfs_bmap_add_extent(
xfs_fsblock_t *first, /* pointer to firstblock variable */
xfs_bmap_free_t *flist, /* list of extents to be freed */
int *logflagsp, /* inode logging flags */
- xfs_extdelta_t *delta, /* Change made to incore extents */
int whichfork, /* data or attr fork */
int rsvd) /* OK to use reserved data blocks */
{
@@ -524,15 +514,6 @@ xfs_bmap_add_extent(
logflags = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
} else
logflags = 0;
- /* DELTA: single new extent */
- if (delta) {
- if (delta->xed_startoff > new->br_startoff)
- delta->xed_startoff = new->br_startoff;
- if (delta->xed_blockcount <
- new->br_startoff + new->br_blockcount)
- delta->xed_blockcount = new->br_startoff +
- new->br_blockcount;
- }
}
/*
* Any kind of new delayed allocation goes here.
@@ -542,7 +523,7 @@ xfs_bmap_add_extent(
ASSERT((cur->bc_private.b.flags &
XFS_BTCUR_BPRV_WASDEL) == 0);
if ((error = xfs_bmap_add_extent_hole_delay(ip, idx, new,
- &logflags, delta, rsvd)))
+ &logflags, rsvd)))
goto done;
}
/*
@@ -553,7 +534,7 @@ xfs_bmap_add_extent(
ASSERT((cur->bc_private.b.flags &
XFS_BTCUR_BPRV_WASDEL) == 0);
if ((error = xfs_bmap_add_extent_hole_real(ip, idx, cur, new,
- &logflags, delta, whichfork)))
+ &logflags, whichfork)))
goto done;
} else {
xfs_bmbt_irec_t prev; /* old extent at offset idx */
@@ -578,17 +559,17 @@ xfs_bmap_add_extent(
XFS_BTCUR_BPRV_WASDEL);
if ((error = xfs_bmap_add_extent_delay_real(ip,
idx, &cur, new, &da_new, first, flist,
- &logflags, delta, rsvd)))
+ &logflags, rsvd)))
goto done;
} else if (new->br_state == XFS_EXT_NORM) {
ASSERT(new->br_state == XFS_EXT_NORM);
if ((error = xfs_bmap_add_extent_unwritten_real(
- ip, idx, &cur, new, &logflags, delta)))
+ ip, idx, &cur, new, &logflags)))
goto done;
} else {
ASSERT(new->br_state == XFS_EXT_UNWRITTEN);
if ((error = xfs_bmap_add_extent_unwritten_real(
- ip, idx, &cur, new, &logflags, delta)))
+ ip, idx, &cur, new, &logflags)))
goto done;
}
ASSERT(*curp == cur || *curp == NULL);
@@ -601,7 +582,7 @@ xfs_bmap_add_extent(
ASSERT((cur->bc_private.b.flags &
XFS_BTCUR_BPRV_WASDEL) == 0);
if ((error = xfs_bmap_add_extent_hole_real(ip, idx, cur,
- new, &logflags, delta, whichfork)))
+ new, &logflags, whichfork)))
goto done;
}
}
@@ -666,7 +647,6 @@ xfs_bmap_add_extent_delay_real(
xfs_fsblock_t *first, /* pointer to firstblock variable */
xfs_bmap_free_t *flist, /* list of extents to be freed */
int *logflagsp, /* inode logging flags */
- xfs_extdelta_t *delta, /* Change made to incore extents */
int rsvd) /* OK to use reserved data block allocation */
{
xfs_btree_cur_t *cur; /* btree cursor */
@@ -797,11 +777,6 @@ xfs_bmap_add_extent_delay_real(
goto done;
}
*dnew = 0;
- /* DELTA: Three in-core extents are replaced by one. */
- temp = LEFT.br_startoff;
- temp2 = LEFT.br_blockcount +
- PREV.br_blockcount +
- RIGHT.br_blockcount;
break;
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
@@ -832,10 +807,6 @@ xfs_bmap_add_extent_delay_real(
goto done;
}
*dnew = 0;
- /* DELTA: Two in-core extents are replaced by one. */
- temp = LEFT.br_startoff;
- temp2 = LEFT.br_blockcount +
- PREV.br_blockcount;
break;
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
@@ -867,10 +838,6 @@ xfs_bmap_add_extent_delay_real(
goto done;
}
*dnew = 0;
- /* DELTA: Two in-core extents are replaced by one. */
- temp = PREV.br_startoff;
- temp2 = PREV.br_blockcount +
- RIGHT.br_blockcount;
break;
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
@@ -900,9 +867,6 @@ xfs_bmap_add_extent_delay_real(
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
*dnew = 0;
- /* DELTA: The in-core extent described by new changed type. */
- temp = new->br_startoff;
- temp2 = new->br_blockcount;
break;
case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
@@ -942,10 +906,6 @@ xfs_bmap_add_extent_delay_real(
xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
*dnew = temp;
- /* DELTA: The boundary between two in-core extents moved. */
- temp = LEFT.br_startoff;
- temp2 = LEFT.br_blockcount +
- PREV.br_blockcount;
break;
case BMAP_LEFT_FILLING:
@@ -990,9 +950,6 @@ xfs_bmap_add_extent_delay_real(
xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
trace_xfs_bmap_post_update(ip, idx + 1, state, _THIS_IP_);
*dnew = temp;
- /* DELTA: One in-core extent is split in two. */
- temp = PREV.br_startoff;
- temp2 = PREV.br_blockcount;
break;
case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
@@ -1031,10 +988,6 @@ xfs_bmap_add_extent_delay_real(
xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
*dnew = temp;
- /* DELTA: The boundary between two in-core extents moved. */
- temp = PREV.br_startoff;
- temp2 = PREV.br_blockcount +
- RIGHT.br_blockcount;
break;
case BMAP_RIGHT_FILLING:
@@ -1078,9 +1031,6 @@ xfs_bmap_add_extent_delay_real(
xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
*dnew = temp;
- /* DELTA: One in-core extent is split in two. */
- temp = PREV.br_startoff;
- temp2 = PREV.br_blockcount;
break;
case 0:
@@ -1161,9 +1111,6 @@ xfs_bmap_add_extent_delay_real(
nullstartblock((int)temp2));
trace_xfs_bmap_post_update(ip, idx + 2, state, _THIS_IP_);
*dnew = temp + temp2;
- /* DELTA: One in-core extent is split in three. */
- temp = PREV.br_startoff;
- temp2 = PREV.br_blockcount;
break;
case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
@@ -1179,13 +1126,6 @@ xfs_bmap_add_extent_delay_real(
ASSERT(0);
}
*curp = cur;
- if (delta) {
- temp2 += temp;
- if (delta->xed_startoff > temp)
- delta->xed_startoff = temp;
- if (delta->xed_blockcount < temp2)
- delta->xed_blockcount = temp2;
- }
done:
*logflagsp = rval;
return error;
@@ -1204,8 +1144,7 @@ xfs_bmap_add_extent_unwritten_real(
xfs_extnum_t idx, /* extent number to update/insert */
xfs_btree_cur_t **curp, /* if *curp is null, not a btree */
xfs_bmbt_irec_t *new, /* new data to add to file extents */
- int *logflagsp, /* inode logging flags */
- xfs_extdelta_t *delta) /* Change made to incore extents */
+ int *logflagsp) /* inode logging flags */
{
xfs_btree_cur_t *cur; /* btree cursor */
xfs_bmbt_rec_host_t *ep; /* extent entry for idx */
@@ -1219,8 +1158,6 @@ xfs_bmap_add_extent_unwritten_real(
/* left is 0, right is 1, prev is 2 */
int rval=0; /* return value (logging flags) */
int state = 0;/* state bits, accessed thru macros */
- xfs_filblks_t temp=0;
- xfs_filblks_t temp2=0;
#define LEFT r[0]
#define RIGHT r[1]
@@ -1341,11 +1278,6 @@ xfs_bmap_add_extent_unwritten_real(
RIGHT.br_blockcount, LEFT.br_state)))
goto done;
}
- /* DELTA: Three in-core extents are replaced by one. */
- temp = LEFT.br_startoff;
- temp2 = LEFT.br_blockcount +
- PREV.br_blockcount +
- RIGHT.br_blockcount;
break;
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
@@ -1382,10 +1314,6 @@ xfs_bmap_add_extent_unwritten_real(
LEFT.br_state)))
goto done;
}
- /* DELTA: Two in-core extents are replaced by one. */
- temp = LEFT.br_startoff;
- temp2 = LEFT.br_blockcount +
- PREV.br_blockcount;
break;
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
@@ -1422,10 +1350,6 @@ xfs_bmap_add_extent_unwritten_real(
newext)))
goto done;
}
- /* DELTA: Two in-core extents are replaced by one. */
- temp = PREV.br_startoff;
- temp2 = PREV.br_blockcount +
- RIGHT.br_blockcount;
break;
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
@@ -1453,9 +1377,6 @@ xfs_bmap_add_extent_unwritten_real(
newext)))
goto done;
}
- /* DELTA: The in-core extent described by new changed type. */
- temp = new->br_startoff;
- temp2 = new->br_blockcount;
break;
case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
@@ -1501,10 +1422,6 @@ xfs_bmap_add_extent_unwritten_real(
LEFT.br_state))
goto done;
}
- /* DELTA: The boundary between two in-core extents moved. */
- temp = LEFT.br_startoff;
- temp2 = LEFT.br_blockcount +
- PREV.br_blockcount;
break;
case BMAP_LEFT_FILLING:
@@ -1544,9 +1461,6 @@ xfs_bmap_add_extent_unwritten_real(
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
- /* DELTA: One in-core extent is split in two. */
- temp = PREV.br_startoff;
- temp2 = PREV.br_blockcount;
break;
case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
@@ -1587,10 +1501,6 @@ xfs_bmap_add_extent_unwritten_real(
newext)))
goto done;
}
- /* DELTA: The boundary between two in-core extents moved. */
- temp = PREV.br_startoff;
- temp2 = PREV.br_blockcount +
- RIGHT.br_blockcount;
break;
case BMAP_RIGHT_FILLING:
@@ -1630,9 +1540,6 @@ xfs_bmap_add_extent_unwritten_real(
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
- /* DELTA: One in-core extent is split in two. */
- temp = PREV.br_startoff;
- temp2 = PREV.br_blockcount;
break;
case 0:
@@ -1692,9 +1599,6 @@ xfs_bmap_add_extent_unwritten_real(
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
- /* DELTA: One in-core extent is split in three. */
- temp = PREV.br_startoff;
- temp2 = PREV.br_blockcount;
break;
case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
@@ -1710,13 +1614,6 @@ xfs_bmap_add_extent_unwritten_real(
ASSERT(0);
}
*curp = cur;
- if (delta) {
- temp2 += temp;
- if (delta->xed_startoff > temp)
- delta->xed_startoff = temp;
- if (delta->xed_blockcount < temp2)
- delta->xed_blockcount = temp2;
- }
done:
*logflagsp = rval;
return error;
@@ -1736,7 +1633,6 @@ xfs_bmap_add_extent_hole_delay(
xfs_extnum_t idx, /* extent number to update/insert */
xfs_bmbt_irec_t *new, /* new data to add to file extents */
int *logflagsp, /* inode logging flags */
- xfs_extdelta_t *delta, /* Change made to incore extents */
int rsvd) /* OK to allocate reserved blocks */
{
xfs_bmbt_rec_host_t *ep; /* extent record for idx */
@@ -1747,7 +1643,6 @@ xfs_bmap_add_extent_hole_delay(
xfs_bmbt_irec_t right; /* right neighbor extent entry */
int state; /* state bits, accessed thru macros */
xfs_filblks_t temp=0; /* temp for indirect calculations */
- xfs_filblks_t temp2=0;
ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
ep = xfs_iext_get_ext(ifp, idx);
@@ -1819,9 +1714,6 @@ xfs_bmap_add_extent_hole_delay(
xfs_iext_remove(ip, idx, 1, state);
ip->i_df.if_lastex = idx - 1;
- /* DELTA: Two in-core extents were replaced by one. */
- temp2 = temp;
- temp = left.br_startoff;
break;
case BMAP_LEFT_CONTIG:
@@ -1841,9 +1733,6 @@ xfs_bmap_add_extent_hole_delay(
trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_);
ip->i_df.if_lastex = idx - 1;
- /* DELTA: One in-core extent grew into a hole. */
- temp2 = temp;
- temp = left.br_startoff;
break;
case BMAP_RIGHT_CONTIG:
@@ -1862,9 +1751,6 @@ xfs_bmap_add_extent_hole_delay(
trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_);
ip->i_df.if_lastex = idx;
- /* DELTA: One in-core extent grew into a hole. */
- temp2 = temp;
- temp = new->br_startoff;
break;
case 0:
@@ -1876,9 +1762,6 @@ xfs_bmap_add_extent_hole_delay(
oldlen = newlen = 0;
xfs_iext_insert(ip, idx, 1, new, state);
ip->i_df.if_lastex = idx;
- /* DELTA: A new in-core extent was added in a hole. */
- temp2 = new->br_blockcount;
- temp = new->br_startoff;
break;
}
if (oldlen != newlen) {
@@ -1889,13 +1772,6 @@ xfs_bmap_add_extent_hole_delay(
* Nothing to do for disk quota accounting here.
*/
}
- if (delta) {
- temp2 += temp;
- if (delta->xed_startoff > temp)
- delta->xed_startoff = temp;
- if (delta->xed_blockcount < temp2)
- delta->xed_blockcount = temp2;
- }
*logflagsp = 0;
return 0;
}
@@ -1911,7 +1787,6 @@ xfs_bmap_add_extent_hole_real(
xfs_btree_cur_t *cur, /* if null, not a btree */
xfs_bmbt_irec_t *new, /* new data to add to file extents */
int *logflagsp, /* inode logging flags */
- xfs_extdelta_t *delta, /* Change made to incore extents */
int whichfork) /* data or attr fork */
{
xfs_bmbt_rec_host_t *ep; /* pointer to extent entry ins. point */
@@ -1922,8 +1797,6 @@ xfs_bmap_add_extent_hole_real(
xfs_bmbt_irec_t right; /* right neighbor extent entry */
int rval=0; /* return value (logging flags) */
int state; /* state bits, accessed thru macros */
- xfs_filblks_t temp=0;
- xfs_filblks_t temp2=0;
ifp = XFS_IFORK_PTR(ip, whichfork);
ASSERT(idx <= ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t));
@@ -2020,11 +1893,6 @@ xfs_bmap_add_extent_hole_real(
left.br_state)))
goto done;
}
- /* DELTA: Two in-core extents were replaced by one. */
- temp = left.br_startoff;
- temp2 = left.br_blockcount +
- new->br_blockcount +
- right.br_blockcount;
break;
case BMAP_LEFT_CONTIG:
@@ -2056,10 +1924,6 @@ xfs_bmap_add_extent_hole_real(
left.br_state)))
goto done;
}
- /* DELTA: One in-core extent grew. */
- temp = left.br_startoff;
- temp2 = left.br_blockcount +
- new->br_blockcount;
break;
case BMAP_RIGHT_CONTIG:
@@ -2092,10 +1956,6 @@ xfs_bmap_add_extent_hole_real(
right.br_state)))
goto done;
}
- /* DELTA: One in-core extent grew. */
- temp = new->br_startoff;
- temp2 = new->br_blockcount +
- right.br_blockcount;
break;
case 0:
@@ -2123,18 +1983,8 @@ xfs_bmap_add_extent_hole_real(
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
- /* DELTA: A new extent was added in a hole. */
- temp = new->br_startoff;
- temp2 = new->br_blockcount;
break;
}
- if (delta) {
- temp2 += temp;
- if (delta->xed_startoff > temp)
- delta->xed_startoff = temp;
- if (delta->xed_blockcount < temp2)
- delta->xed_blockcount = temp2;
- }
done:
*logflagsp = rval;
return error;
@@ -2959,7 +2809,6 @@ xfs_bmap_del_extent(
xfs_btree_cur_t *cur, /* if null, not a btree */
xfs_bmbt_irec_t *del, /* data to remove from extents */
int *logflagsp, /* inode logging flags */
- xfs_extdelta_t *delta, /* Change made to incore extents */
int whichfork, /* data or attr fork */
int rsvd) /* OK to allocate reserved blocks */
{
@@ -3265,14 +3114,6 @@ xfs_bmap_del_extent(
if (da_old > da_new)
xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, (int64_t)(da_old - da_new),
rsvd);
- if (delta) {
- /* DELTA: report the original extent. */
- if (delta->xed_startoff > got.br_startoff)
- delta->xed_startoff = got.br_startoff;
- if (delta->xed_blockcount < got.br_startoff+got.br_blockcount)
- delta->xed_blockcount = got.br_startoff +
- got.br_blockcount;
- }
done:
*logflagsp = flags;
return error;
@@ -3754,9 +3595,10 @@ xfs_bmap_add_attrfork(
ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
}
ASSERT(ip->i_d.di_anextents == 0);
- IHOLD(ip);
- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+
+ xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
switch (ip->i_d.di_format) {
case XFS_DINODE_FMT_DEV:
ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
@@ -4483,8 +4325,7 @@ xfs_bmapi(
xfs_extlen_t total, /* total blocks needed */
xfs_bmbt_irec_t *mval, /* output: map values */
int *nmap, /* i/o: mval size/count */
- xfs_bmap_free_t *flist, /* i/o: list extents to free */
- xfs_extdelta_t *delta) /* o: change made to incore extents */
+ xfs_bmap_free_t *flist) /* i/o: list extents to free */
{
xfs_fsblock_t abno; /* allocated block number */
xfs_extlen_t alen; /* allocated extent length */
@@ -4596,10 +4437,7 @@ xfs_bmapi(
end = bno + len;
obno = bno;
bma.ip = NULL;
- if (delta) {
- delta->xed_startoff = NULLFILEOFF;
- delta->xed_blockcount = 0;
- }
+
while (bno < end && n < *nmap) {
/*
* Reading past eof, act as though there's a hole
@@ -4620,19 +4458,13 @@ xfs_bmapi(
* allocate the stuff asked for in this bmap call
* but that wouldn't be as good.
*/
- if (wasdelay && !(flags & XFS_BMAPI_EXACT)) {
+ if (wasdelay) {
alen = (xfs_extlen_t)got.br_blockcount;
aoff = got.br_startoff;
if (lastx != NULLEXTNUM && lastx) {
ep = xfs_iext_get_ext(ifp, lastx - 1);
xfs_bmbt_get_all(ep, &prev);
}
- } else if (wasdelay) {
- alen = (xfs_extlen_t)
- XFS_FILBLKS_MIN(len,
- (got.br_startoff +
- got.br_blockcount) - bno);
- aoff = bno;
} else {
alen = (xfs_extlen_t)
XFS_FILBLKS_MIN(len, MAXEXTLEN);
@@ -4831,7 +4663,7 @@ xfs_bmapi(
got.br_state = XFS_EXT_UNWRITTEN;
}
error = xfs_bmap_add_extent(ip, lastx, &cur, &got,
- firstblock, flist, &tmp_logflags, delta,
+ firstblock, flist, &tmp_logflags,
whichfork, (flags & XFS_BMAPI_RSVBLOCKS));
logflags |= tmp_logflags;
if (error)
@@ -4927,7 +4759,7 @@ xfs_bmapi(
}
mval->br_state = XFS_EXT_NORM;
error = xfs_bmap_add_extent(ip, lastx, &cur, mval,
- firstblock, flist, &tmp_logflags, delta,
+ firstblock, flist, &tmp_logflags,
whichfork, (flags & XFS_BMAPI_RSVBLOCKS));
logflags |= tmp_logflags;
if (error)
@@ -5017,14 +4849,6 @@ xfs_bmapi(
ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE ||
XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max);
error = 0;
- if (delta && delta->xed_startoff != NULLFILEOFF) {
- /* A change was actually made.
- * Note that delta->xed_blockount is an offset at this
- * point and needs to be converted to a block count.
- */
- ASSERT(delta->xed_blockcount > delta->xed_startoff);
- delta->xed_blockcount -= delta->xed_startoff;
- }
error0:
/*
* Log everything. Do this after conversion, there's no point in
@@ -5136,8 +4960,6 @@ xfs_bunmapi(
xfs_fsblock_t *firstblock, /* first allocated block
controls a.g. for allocs */
xfs_bmap_free_t *flist, /* i/o: list extents to free */
- xfs_extdelta_t *delta, /* o: change made to incore
- extents */
int *done) /* set if not done yet */
{
xfs_btree_cur_t *cur; /* bmap btree cursor */
@@ -5196,10 +5018,7 @@ xfs_bunmapi(
bno = start + len - 1;
ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
&prev);
- if (delta) {
- delta->xed_startoff = NULLFILEOFF;
- delta->xed_blockcount = 0;
- }
+
/*
* Check to see if the given block number is past the end of the
* file, back up to the last block if so...
@@ -5297,7 +5116,7 @@ xfs_bunmapi(
}
del.br_state = XFS_EXT_UNWRITTEN;
error = xfs_bmap_add_extent(ip, lastx, &cur, &del,
- firstblock, flist, &logflags, delta,
+ firstblock, flist, &logflags,
XFS_DATA_FORK, 0);
if (error)
goto error0;
@@ -5352,7 +5171,7 @@ xfs_bunmapi(
prev.br_state = XFS_EXT_UNWRITTEN;
error = xfs_bmap_add_extent(ip, lastx - 1, &cur,
&prev, firstblock, flist, &logflags,
- delta, XFS_DATA_FORK, 0);
+ XFS_DATA_FORK, 0);
if (error)
goto error0;
goto nodelete;
@@ -5361,7 +5180,7 @@ xfs_bunmapi(
del.br_state = XFS_EXT_UNWRITTEN;
error = xfs_bmap_add_extent(ip, lastx, &cur,
&del, firstblock, flist, &logflags,
- delta, XFS_DATA_FORK, 0);
+ XFS_DATA_FORK, 0);
if (error)
goto error0;
goto nodelete;
@@ -5414,7 +5233,7 @@ xfs_bunmapi(
goto error0;
}
error = xfs_bmap_del_extent(ip, tp, lastx, flist, cur, &del,
- &tmp_logflags, delta, whichfork, rsvd);
+ &tmp_logflags, whichfork, rsvd);
logflags |= tmp_logflags;
if (error)
goto error0;
@@ -5471,14 +5290,6 @@ nodelete:
ASSERT(ifp->if_ext_max ==
XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
error = 0;
- if (delta && delta->xed_startoff != NULLFILEOFF) {
- /* A change was actually made.
- * Note that delta->xed_blockount is an offset at this
- * point and needs to be converted to a block count.
- */
- ASSERT(delta->xed_blockcount > delta->xed_startoff);
- delta->xed_blockcount -= delta->xed_startoff;
- }
error0:
/*
* Log everything. Do this after conversion, there's no point in
@@ -5605,28 +5416,6 @@ xfs_getbmap(
prealloced = 0;
fixlen = 1LL << 32;
} else {
- /*
- * If the BMV_IF_NO_DMAPI_READ interface bit specified, do
- * not generate a DMAPI read event. Otherwise, if the
- * DM_EVENT_READ bit is set for the file, generate a read
- * event in order that the DMAPI application may do its thing
- * before we return the extents. Usually this means restoring
- * user file data to regions of the file that look like holes.
- *
- * The "old behavior" (from XFS_IOC_GETBMAP) is to not specify
- * BMV_IF_NO_DMAPI_READ so that read events are generated.
- * If this were not true, callers of ioctl(XFS_IOC_GETBMAP)
- * could misinterpret holes in a DMAPI file as true holes,
- * when in fact they may represent offline user data.
- */
- if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) &&
- !(iflags & BMV_IF_NO_DMAPI_READ)) {
- error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip,
- 0, 0, 0, NULL);
- if (error)
- return XFS_ERROR(error);
- }
-
if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
ip->i_d.di_format != XFS_DINODE_FMT_BTREE &&
ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
@@ -5713,7 +5502,7 @@ xfs_getbmap(
error = xfs_bmapi(NULL, ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset),
XFS_BB_TO_FSB(mp, bmv->bmv_length),
bmapi_flags, NULL, 0, map, &nmap,
- NULL, NULL);
+ NULL);
if (error)
goto out_free_map;
ASSERT(nmap <= subnex);
@@ -5859,66 +5648,34 @@ xfs_bmap_eof(
}
#ifdef DEBUG
-STATIC
-xfs_buf_t *
+STATIC struct xfs_buf *
xfs_bmap_get_bp(
- xfs_btree_cur_t *cur,
+ struct xfs_btree_cur *cur,
xfs_fsblock_t bno)
{
- int i;
- xfs_buf_t *bp;
+ struct xfs_log_item_desc *lidp;
+ int i;
if (!cur)
- return(NULL);
-
- bp = NULL;
- for(i = 0; i < XFS_BTREE_MAXLEVELS; i++) {
- bp = cur->bc_bufs[i];
- if (!bp) break;
- if (XFS_BUF_ADDR(bp) == bno)
- break; /* Found it */
- }
- if (i == XFS_BTREE_MAXLEVELS)
- bp = NULL;
-
- if (!bp) { /* Chase down all the log items to see if the bp is there */
- xfs_log_item_chunk_t *licp;
- xfs_trans_t *tp;
-
- tp = cur->bc_tp;
- licp = &tp->t_items;
- while (!bp && licp != NULL) {
- if (xfs_lic_are_all_free(licp)) {
- licp = licp->lic_next;
- continue;
- }
- for (i = 0; i < licp->lic_unused; i++) {
- xfs_log_item_desc_t *lidp;
- xfs_log_item_t *lip;
- xfs_buf_log_item_t *bip;
- xfs_buf_t *lbp;
-
- if (xfs_lic_isfree(licp, i)) {
- continue;
- }
-
- lidp = xfs_lic_slot(licp, i);
- lip = lidp->lid_item;
- if (lip->li_type != XFS_LI_BUF)
- continue;
+ return NULL;
- bip = (xfs_buf_log_item_t *)lip;
- lbp = bip->bli_buf;
+ for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) {
+ if (!cur->bc_bufs[i])
+ break;
+ if (XFS_BUF_ADDR(cur->bc_bufs[i]) == bno)
+ return cur->bc_bufs[i];
+ }
- if (XFS_BUF_ADDR(lbp) == bno) {
- bp = lbp;
- break; /* Found it */
- }
- }
- licp = licp->lic_next;
- }
+ /* Chase down all the log items to see if the bp is there */
+ list_for_each_entry(lidp, &cur->bc_tp->t_items, lid_trans) {
+ struct xfs_buf_log_item *bip;
+ bip = (struct xfs_buf_log_item *)lidp->lid_item;
+ if (bip->bli_item.li_type == XFS_LI_BUF &&
+ XFS_BUF_ADDR(bip->bli_buf) == bno)
+ return bip->bli_buf;
}
- return(bp);
+
+ return NULL;
}
STATIC void
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 419dafb9d87..b13569a6179 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -28,20 +28,6 @@ struct xfs_trans;
extern kmem_zone_t *xfs_bmap_free_item_zone;
/*
- * DELTA: describe a change to the in-core extent list.
- *
- * Internally the use of xed_blockount is somewhat funky.
- * xed_blockcount contains an offset much of the time because this
- * makes merging changes easier. (xfs_fileoff_t and xfs_filblks_t are
- * the same underlying type).
- */
-typedef struct xfs_extdelta
-{
- xfs_fileoff_t xed_startoff; /* offset of range */
- xfs_filblks_t xed_blockcount; /* blocks in range */
-} xfs_extdelta_t;
-
-/*
* List of extents to be free "later".
* The list is kept sorted on xbf_startblock.
*/
@@ -82,16 +68,13 @@ typedef struct xfs_bmap_free
#define XFS_BMAPI_DELAY 0x002 /* delayed write operation */
#define XFS_BMAPI_ENTIRE 0x004 /* return entire extent, not trimmed */
#define XFS_BMAPI_METADATA 0x008 /* mapping metadata not user data */
-#define XFS_BMAPI_EXACT 0x010 /* allocate only to spec'd bounds */
-#define XFS_BMAPI_ATTRFORK 0x020 /* use attribute fork not data */
-#define XFS_BMAPI_ASYNC 0x040 /* bunmapi xactions can be async */
-#define XFS_BMAPI_RSVBLOCKS 0x080 /* OK to alloc. reserved data blocks */
-#define XFS_BMAPI_PREALLOC 0x100 /* preallocation op: unwritten space */
-#define XFS_BMAPI_IGSTATE 0x200 /* Ignore state - */
+#define XFS_BMAPI_ATTRFORK 0x010 /* use attribute fork not data */
+#define XFS_BMAPI_RSVBLOCKS 0x020 /* OK to alloc. reserved data blocks */
+#define XFS_BMAPI_PREALLOC 0x040 /* preallocation op: unwritten space */
+#define XFS_BMAPI_IGSTATE 0x080 /* Ignore state - */
/* combine contig. space */
-#define XFS_BMAPI_CONTIG 0x400 /* must allocate only one extent */
-/* XFS_BMAPI_DIRECT_IO 0x800 */
-#define XFS_BMAPI_CONVERT 0x1000 /* unwritten extent conversion - */
+#define XFS_BMAPI_CONTIG 0x100 /* must allocate only one extent */
+#define XFS_BMAPI_CONVERT 0x200 /* unwritten extent conversion - */
/* need write cache flushing and no */
/* additional allocation alignments */
@@ -100,9 +83,7 @@ typedef struct xfs_bmap_free
{ XFS_BMAPI_DELAY, "DELAY" }, \
{ XFS_BMAPI_ENTIRE, "ENTIRE" }, \
{ XFS_BMAPI_METADATA, "METADATA" }, \
- { XFS_BMAPI_EXACT, "EXACT" }, \
{ XFS_BMAPI_ATTRFORK, "ATTRFORK" }, \
- { XFS_BMAPI_ASYNC, "ASYNC" }, \
{ XFS_BMAPI_RSVBLOCKS, "RSVBLOCKS" }, \
{ XFS_BMAPI_PREALLOC, "PREALLOC" }, \
{ XFS_BMAPI_IGSTATE, "IGSTATE" }, \
@@ -310,9 +291,7 @@ xfs_bmapi(
xfs_extlen_t total, /* total blocks needed */
struct xfs_bmbt_irec *mval, /* output: map values */
int *nmap, /* i/o: mval size/count */
- xfs_bmap_free_t *flist, /* i/o: list extents to free */
- xfs_extdelta_t *delta); /* o: change made to incore
- extents */
+ xfs_bmap_free_t *flist); /* i/o: list extents to free */
/*
* Map file blocks to filesystem blocks, simple version.
@@ -346,8 +325,6 @@ xfs_bunmapi(
xfs_fsblock_t *firstblock, /* first allocated block
controls a.g. for allocs */
xfs_bmap_free_t *flist, /* i/o: list extents to free */
- xfs_extdelta_t *delta, /* o: change made to incore
- extents */
int *done); /* set if not done yet */
/*
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 416e47e54b8..87d3c10b695 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -24,21 +24,16 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
#include "xfs_alloc_btree.h"
#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_inode_item.h"
#include "xfs_alloc.h"
#include "xfs_btree.h"
#include "xfs_btree_trace.h"
-#include "xfs_ialloc.h"
#include "xfs_itable.h"
#include "xfs_bmap.h"
#include "xfs_error.h"
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index 96be4b0f249..829af92f0fb 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -24,20 +24,15 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
#include "xfs_alloc_btree.h"
#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_inode_item.h"
#include "xfs_btree.h"
#include "xfs_btree_trace.h"
-#include "xfs_ialloc.h"
#include "xfs_error.h"
#include "xfs_trace.h"
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 02a80984aa0..1b09d7a280d 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -24,7 +24,6 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_buf_item.h"
#include "xfs_trans_priv.h"
@@ -34,6 +33,12 @@
kmem_zone_t *xfs_buf_item_zone;
+static inline struct xfs_buf_log_item *BUF_ITEM(struct xfs_log_item *lip)
+{
+ return container_of(lip, struct xfs_buf_log_item, bli_item);
+}
+
+
#ifdef XFS_TRANS_DEBUG
/*
* This function uses an alternate strategy for tracking the bytes
@@ -151,12 +156,13 @@ STATIC void xfs_buf_do_callbacks(xfs_buf_t *bp, xfs_log_item_t *lip);
*/
STATIC uint
xfs_buf_item_size(
- xfs_buf_log_item_t *bip)
+ struct xfs_log_item *lip)
{
- uint nvecs;
- int next_bit;
- int last_bit;
- xfs_buf_t *bp;
+ struct xfs_buf_log_item *bip = BUF_ITEM(lip);
+ struct xfs_buf *bp = bip->bli_buf;
+ uint nvecs;
+ int next_bit;
+ int last_bit;
ASSERT(atomic_read(&bip->bli_refcount) > 0);
if (bip->bli_flags & XFS_BLI_STALE) {
@@ -170,7 +176,6 @@ xfs_buf_item_size(
return 1;
}
- bp = bip->bli_buf;
ASSERT(bip->bli_flags & XFS_BLI_LOGGED);
nvecs = 1;
last_bit = xfs_next_bit(bip->bli_format.blf_data_map,
@@ -219,13 +224,13 @@ xfs_buf_item_size(
*/
STATIC void
xfs_buf_item_format(
- xfs_buf_log_item_t *bip,
- xfs_log_iovec_t *log_vector)
+ struct xfs_log_item *lip,
+ struct xfs_log_iovec *vecp)
{
+ struct xfs_buf_log_item *bip = BUF_ITEM(lip);
+ struct xfs_buf *bp = bip->bli_buf;
uint base_size;
uint nvecs;
- xfs_log_iovec_t *vecp;
- xfs_buf_t *bp;
int first_bit;
int last_bit;
int next_bit;
@@ -235,8 +240,6 @@ xfs_buf_item_format(
ASSERT(atomic_read(&bip->bli_refcount) > 0);
ASSERT((bip->bli_flags & XFS_BLI_LOGGED) ||
(bip->bli_flags & XFS_BLI_STALE));
- bp = bip->bli_buf;
- vecp = log_vector;
/*
* The size of the base structure is the size of the
@@ -248,7 +251,7 @@ xfs_buf_item_format(
base_size =
(uint)(sizeof(xfs_buf_log_format_t) +
((bip->bli_format.blf_map_size - 1) * sizeof(uint)));
- vecp->i_addr = (xfs_caddr_t)&bip->bli_format;
+ vecp->i_addr = &bip->bli_format;
vecp->i_len = base_size;
vecp->i_type = XLOG_REG_TYPE_BFORMAT;
vecp++;
@@ -263,7 +266,7 @@ xfs_buf_item_format(
*/
if (bip->bli_flags & XFS_BLI_INODE_BUF) {
if (!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) &&
- xfs_log_item_in_current_chkpt(&bip->bli_item)))
+ xfs_log_item_in_current_chkpt(lip)))
bip->bli_format.blf_flags |= XFS_BLF_INODE_BUF;
bip->bli_flags &= ~XFS_BLI_INODE_BUF;
}
@@ -356,66 +359,90 @@ xfs_buf_item_format(
/*
* This is called to pin the buffer associated with the buf log item in memory
- * so it cannot be written out. Simply call bpin() on the buffer to do this.
+ * so it cannot be written out.
*
* We also always take a reference to the buffer log item here so that the bli
* is held while the item is pinned in memory. This means that we can
* unconditionally drop the reference count a transaction holds when the
* transaction is completed.
*/
-
STATIC void
xfs_buf_item_pin(
- xfs_buf_log_item_t *bip)
+ struct xfs_log_item *lip)
{
- xfs_buf_t *bp;
+ struct xfs_buf_log_item *bip = BUF_ITEM(lip);
- bp = bip->bli_buf;
- ASSERT(XFS_BUF_ISBUSY(bp));
+ ASSERT(XFS_BUF_ISBUSY(bip->bli_buf));
ASSERT(atomic_read(&bip->bli_refcount) > 0);
ASSERT((bip->bli_flags & XFS_BLI_LOGGED) ||
(bip->bli_flags & XFS_BLI_STALE));
- atomic_inc(&bip->bli_refcount);
+
trace_xfs_buf_item_pin(bip);
- xfs_bpin(bp);
-}
+ atomic_inc(&bip->bli_refcount);
+ atomic_inc(&bip->bli_buf->b_pin_count);
+}
/*
* This is called to unpin the buffer associated with the buf log
* item which was previously pinned with a call to xfs_buf_item_pin().
- * Just call bunpin() on the buffer to do this.
*
* Also drop the reference to the buf item for the current transaction.
* If the XFS_BLI_STALE flag is set and we are the last reference,
* then free up the buf log item and unlock the buffer.
+ *
+ * If the remove flag is set we are called from uncommit in the
+ * forced-shutdown path. If that is true and the reference count on
+ * the log item is going to drop to zero we need to free the item's
+ * descriptor in the transaction.
*/
STATIC void
xfs_buf_item_unpin(
- xfs_buf_log_item_t *bip)
+ struct xfs_log_item *lip,
+ int remove)
{
- struct xfs_ail *ailp;
- xfs_buf_t *bp;
- int freed;
+ struct xfs_buf_log_item *bip = BUF_ITEM(lip);
+ xfs_buf_t *bp = bip->bli_buf;
+ struct xfs_ail *ailp = lip->li_ailp;
int stale = bip->bli_flags & XFS_BLI_STALE;
+ int freed;
- bp = bip->bli_buf;
- ASSERT(bp != NULL);
ASSERT(XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *) == bip);
ASSERT(atomic_read(&bip->bli_refcount) > 0);
+
trace_xfs_buf_item_unpin(bip);
freed = atomic_dec_and_test(&bip->bli_refcount);
- ailp = bip->bli_item.li_ailp;
- xfs_bunpin(bp);
+
+ if (atomic_dec_and_test(&bp->b_pin_count))
+ wake_up_all(&bp->b_waiters);
+
if (freed && stale) {
ASSERT(bip->bli_flags & XFS_BLI_STALE);
ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
ASSERT(!(XFS_BUF_ISDELAYWRITE(bp)));
ASSERT(XFS_BUF_ISSTALE(bp));
ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
+
trace_xfs_buf_item_unpin_stale(bip);
+ if (remove) {
+ /*
+ * We have to remove the log item from the transaction
+ * as we are about to release our reference to the
+ * buffer. If we don't, the unlock that occurs later
+ * in xfs_trans_uncommit() will ry to reference the
+ * buffer which we no longer have a hold on.
+ */
+ xfs_trans_del_item(lip);
+
+ /*
+ * Since the transaction no longer refers to the buffer,
+ * the buffer should no longer refer to the transaction.
+ */
+ XFS_BUF_SET_FSPRIVATE2(bp, NULL);
+ }
+
/*
* If we get called here because of an IO error, we may
* or may not have the item on the AIL. xfs_trans_ail_delete()
@@ -437,48 +464,6 @@ xfs_buf_item_unpin(
}
/*
- * this is called from uncommit in the forced-shutdown path.
- * we need to check to see if the reference count on the log item
- * is going to drop to zero. If so, unpin will free the log item
- * so we need to free the item's descriptor (that points to the item)
- * in the transaction.
- */
-STATIC void
-xfs_buf_item_unpin_remove(
- xfs_buf_log_item_t *bip,
- xfs_trans_t *tp)
-{
- /* will xfs_buf_item_unpin() call xfs_buf_item_relse()? */
- if ((atomic_read(&bip->bli_refcount) == 1) &&
- (bip->bli_flags & XFS_BLI_STALE)) {
- /*
- * yes -- We can safely do some work here and then call
- * buf_item_unpin to do the rest because we are
- * are holding the buffer locked so no one else will be
- * able to bump up the refcount. We have to remove the
- * log item from the transaction as we are about to release
- * our reference to the buffer. If we don't, the unlock that
- * occurs later in the xfs_trans_uncommit() will try to
- * reference the buffer which we no longer have a hold on.
- */
- struct xfs_log_item_desc *lidp;
-
- ASSERT(XFS_BUF_VALUSEMA(bip->bli_buf) <= 0);
- trace_xfs_buf_item_unpin_stale(bip);
-
- lidp = xfs_trans_find_item(tp, (xfs_log_item_t *)bip);
- xfs_trans_free_item(tp, lidp);
-
- /*
- * Since the transaction no longer refers to the buffer, the
- * buffer should no longer refer to the transaction.
- */
- XFS_BUF_SET_FSPRIVATE2(bip->bli_buf, NULL);
- }
- xfs_buf_item_unpin(bip);
-}
-
-/*
* This is called to attempt to lock the buffer associated with this
* buf log item. Don't sleep on the buffer lock. If we can't get
* the lock right away, return 0. If we can get the lock, take a
@@ -488,11 +473,11 @@ xfs_buf_item_unpin_remove(
*/
STATIC uint
xfs_buf_item_trylock(
- xfs_buf_log_item_t *bip)
+ struct xfs_log_item *lip)
{
- xfs_buf_t *bp;
+ struct xfs_buf_log_item *bip = BUF_ITEM(lip);
+ struct xfs_buf *bp = bip->bli_buf;
- bp = bip->bli_buf;
if (XFS_BUF_ISPINNED(bp))
return XFS_ITEM_PINNED;
if (!XFS_BUF_CPSEMA(bp))
@@ -529,13 +514,12 @@ xfs_buf_item_trylock(
*/
STATIC void
xfs_buf_item_unlock(
- xfs_buf_log_item_t *bip)
+ struct xfs_log_item *lip)
{
- int aborted;
- xfs_buf_t *bp;
- uint hold;
-
- bp = bip->bli_buf;
+ struct xfs_buf_log_item *bip = BUF_ITEM(lip);
+ struct xfs_buf *bp = bip->bli_buf;
+ int aborted;
+ uint hold;
/* Clear the buffer's association with this transaction. */
XFS_BUF_SET_FSPRIVATE2(bp, NULL);
@@ -546,7 +530,7 @@ xfs_buf_item_unlock(
* (cancelled) buffers at unpin time, but we'll never go through the
* pin/unpin cycle if we abort inside commit.
*/
- aborted = (bip->bli_item.li_flags & XFS_LI_ABORTED) != 0;
+ aborted = (lip->li_flags & XFS_LI_ABORTED) != 0;
/*
* Before possibly freeing the buf item, determine if we should
@@ -607,16 +591,16 @@ xfs_buf_item_unlock(
*/
STATIC xfs_lsn_t
xfs_buf_item_committed(
- xfs_buf_log_item_t *bip,
+ struct xfs_log_item *lip,
xfs_lsn_t lsn)
{
+ struct xfs_buf_log_item *bip = BUF_ITEM(lip);
+
trace_xfs_buf_item_committed(bip);
- if ((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) &&
- (bip->bli_item.li_lsn != 0)) {
- return bip->bli_item.li_lsn;
- }
- return (lsn);
+ if ((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && lip->li_lsn != 0)
+ return lip->li_lsn;
+ return lsn;
}
/*
@@ -626,15 +610,16 @@ xfs_buf_item_committed(
*/
STATIC void
xfs_buf_item_push(
- xfs_buf_log_item_t *bip)
+ struct xfs_log_item *lip)
{
- xfs_buf_t *bp;
+ struct xfs_buf_log_item *bip = BUF_ITEM(lip);
+ struct xfs_buf *bp = bip->bli_buf;
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
+ ASSERT(!XFS_BUF_ISDELAYWRITE(bp));
+
trace_xfs_buf_item_push(bip);
- bp = bip->bli_buf;
- ASSERT(!XFS_BUF_ISDELAYWRITE(bp));
xfs_buf_relse(bp);
}
@@ -646,22 +631,24 @@ xfs_buf_item_push(
*/
STATIC void
xfs_buf_item_pushbuf(
- xfs_buf_log_item_t *bip)
+ struct xfs_log_item *lip)
{
- xfs_buf_t *bp;
+ struct xfs_buf_log_item *bip = BUF_ITEM(lip);
+ struct xfs_buf *bp = bip->bli_buf;
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
+ ASSERT(XFS_BUF_ISDELAYWRITE(bp));
+
trace_xfs_buf_item_pushbuf(bip);
- bp = bip->bli_buf;
- ASSERT(XFS_BUF_ISDELAYWRITE(bp));
xfs_buf_delwri_promote(bp);
xfs_buf_relse(bp);
}
-/* ARGSUSED */
STATIC void
-xfs_buf_item_committing(xfs_buf_log_item_t *bip, xfs_lsn_t commit_lsn)
+xfs_buf_item_committing(
+ struct xfs_log_item *lip,
+ xfs_lsn_t commit_lsn)
{
}
@@ -669,21 +656,16 @@ xfs_buf_item_committing(xfs_buf_log_item_t *bip, xfs_lsn_t commit_lsn)
* This is the ops vector shared by all buf log items.
*/
static struct xfs_item_ops xfs_buf_item_ops = {
- .iop_size = (uint(*)(xfs_log_item_t*))xfs_buf_item_size,
- .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
- xfs_buf_item_format,
- .iop_pin = (void(*)(xfs_log_item_t*))xfs_buf_item_pin,
- .iop_unpin = (void(*)(xfs_log_item_t*))xfs_buf_item_unpin,
- .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t *))
- xfs_buf_item_unpin_remove,
- .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_buf_item_trylock,
- .iop_unlock = (void(*)(xfs_log_item_t*))xfs_buf_item_unlock,
- .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
- xfs_buf_item_committed,
- .iop_push = (void(*)(xfs_log_item_t*))xfs_buf_item_push,
- .iop_pushbuf = (void(*)(xfs_log_item_t*))xfs_buf_item_pushbuf,
- .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
- xfs_buf_item_committing
+ .iop_size = xfs_buf_item_size,
+ .iop_format = xfs_buf_item_format,
+ .iop_pin = xfs_buf_item_pin,
+ .iop_unpin = xfs_buf_item_unpin,
+ .iop_trylock = xfs_buf_item_trylock,
+ .iop_unlock = xfs_buf_item_unlock,
+ .iop_committed = xfs_buf_item_committed,
+ .iop_push = xfs_buf_item_push,
+ .iop_pushbuf = xfs_buf_item_pushbuf,
+ .iop_committing = xfs_buf_item_committing
};
@@ -712,7 +694,6 @@ xfs_buf_item_init(
*/
if (bp->b_mount != mp)
bp->b_mount = mp;
- XFS_BUF_SET_BDSTRAT_FUNC(bp, xfs_bdstrat_cb);
if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) {
lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
if (lip->li_type == XFS_LI_BUF) {
@@ -1098,15 +1079,14 @@ xfs_buf_error_relse(
* It is called by xfs_buf_iodone_callbacks() above which will take
* care of cleaning up the buffer itself.
*/
-/* ARGSUSED */
void
xfs_buf_iodone(
- xfs_buf_t *bp,
- xfs_buf_log_item_t *bip)
+ struct xfs_buf *bp,
+ struct xfs_log_item *lip)
{
- struct xfs_ail *ailp = bip->bli_item.li_ailp;
+ struct xfs_ail *ailp = lip->li_ailp;
- ASSERT(bip->bli_buf == bp);
+ ASSERT(BUF_ITEM(lip)->bli_buf == bp);
xfs_buf_rele(bp);
@@ -1120,6 +1100,6 @@ xfs_buf_iodone(
* Either way, AIL is useless if we're forcing a shutdown.
*/
spin_lock(&ailp->xa_lock);
- xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip);
- xfs_buf_item_free(bip);
+ xfs_trans_ail_delete(ailp, lip);
+ xfs_buf_item_free(BUF_ITEM(lip));
}
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index f20bb472d58..0e2ed43f16c 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -124,7 +124,7 @@ void xfs_buf_attach_iodone(struct xfs_buf *,
void(*)(struct xfs_buf *, xfs_log_item_t *),
xfs_log_item_t *);
void xfs_buf_iodone_callbacks(struct xfs_buf *);
-void xfs_buf_iodone(struct xfs_buf *, xfs_buf_log_item_t *);
+void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *);
#ifdef XFS_TRANS_DEBUG
void
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 0ca556b4bf3..30fa0e206fb 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -25,19 +25,14 @@
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_da_btree.h"
#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_inode_item.h"
#include "xfs_alloc.h"
-#include "xfs_btree.h"
#include "xfs_bmap.h"
#include "xfs_attr.h"
#include "xfs_attr_leaf.h"
@@ -581,16 +576,14 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
xfs_da_intnode_t *node;
xfs_da_node_entry_t *btree;
int tmp;
- xfs_mount_t *mp;
node = oldblk->bp->data;
- mp = state->mp;
ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count)));
ASSERT(newblk->blkno != 0);
if (state->args->whichfork == XFS_DATA_FORK)
- ASSERT(newblk->blkno >= mp->m_dirleafblk &&
- newblk->blkno < mp->m_dirfreeblk);
+ ASSERT(newblk->blkno >= state->mp->m_dirleafblk &&
+ newblk->blkno < state->mp->m_dirfreeblk);
/*
* We may need to make some room before we insert the new node.
@@ -1601,7 +1594,7 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|
XFS_BMAPI_CONTIG,
args->firstblock, args->total, &map, &nmap,
- args->flist, NULL))) {
+ args->flist))) {
return error;
}
ASSERT(nmap <= 1);
@@ -1622,8 +1615,7 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE|
XFS_BMAPI_METADATA,
args->firstblock, args->total,
- &mapp[mapi], &nmap, args->flist,
- NULL))) {
+ &mapp[mapi], &nmap, args->flist))) {
kmem_free(mapp);
return error;
}
@@ -1884,7 +1876,7 @@ xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
*/
if ((error = xfs_bunmapi(tp, dp, dead_blkno, count,
xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
- 0, args->firstblock, args->flist, NULL,
+ 0, args->firstblock, args->flist,
&done)) == ENOSPC) {
if (w != XFS_DATA_FORK)
break;
@@ -1989,7 +1981,7 @@ xfs_da_do_buf(
nfsb,
XFS_BMAPI_METADATA |
xfs_bmapi_aflag(whichfork),
- NULL, 0, mapp, &nmap, NULL, NULL)))
+ NULL, 0, mapp, &nmap, NULL)))
goto exit0;
}
} else {
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 5bba29a0781..3b9582c60a2 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -24,24 +24,15 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_inode_item.h"
#include "xfs_bmap.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
#include "xfs_itable.h"
#include "xfs_dfrag.h"
#include "xfs_error.h"
-#include "xfs_rw.h"
#include "xfs_vnodeops.h"
#include "xfs_trace.h"
@@ -69,7 +60,9 @@ xfs_swapext(
goto out;
}
- if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) {
+ if (!(file->f_mode & FMODE_WRITE) ||
+ !(file->f_mode & FMODE_READ) ||
+ (file->f_flags & O_APPEND)) {
error = XFS_ERROR(EBADF);
goto out_put_file;
}
@@ -81,6 +74,7 @@ xfs_swapext(
}
if (!(tmp_file->f_mode & FMODE_WRITE) ||
+ !(tmp_file->f_mode & FMODE_READ) ||
(tmp_file->f_flags & O_APPEND)) {
error = XFS_ERROR(EBADF);
goto out_put_tmp_file;
@@ -422,11 +416,8 @@ xfs_swap_extents(
}
- IHOLD(ip);
- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-
- IHOLD(tip);
- xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+ xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+ xfs_trans_ijoin_ref(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
xfs_trans_log_inode(tp, ip, ilf_fields);
xfs_trans_log_inode(tp, tip, tilf_fields);
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 42520f04126..a1321bc7f19 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -25,13 +25,11 @@
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_da_btree.h"
#include "xfs_bmap_btree.h"
#include "xfs_alloc_btree.h"
#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_inode_item.h"
@@ -382,7 +380,7 @@ xfs_readdir(
int rval; /* return value */
int v; /* type-checking value */
- xfs_itrace_entry(dp);
+ trace_xfs_readdir(dp);
if (XFS_FORCED_SHUTDOWN(dp->i_mount))
return XFS_ERROR(EIO);
@@ -549,7 +547,7 @@ xfs_dir2_grow_inode(
if ((error = xfs_bmapi(tp, dp, bno, count,
XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
args->firstblock, args->total, &map, &nmap,
- args->flist, NULL)))
+ args->flist)))
return error;
ASSERT(nmap <= 1);
if (nmap == 1) {
@@ -581,8 +579,7 @@ xfs_dir2_grow_inode(
if ((error = xfs_bmapi(tp, dp, b, c,
XFS_BMAPI_WRITE|XFS_BMAPI_METADATA,
args->firstblock, args->total,
- &mapp[mapi], &nmap, args->flist,
- NULL))) {
+ &mapp[mapi], &nmap, args->flist))) {
kmem_free(mapp);
return error;
}
@@ -715,7 +712,7 @@ xfs_dir2_shrink_inode(
*/
if ((error = xfs_bunmapi(tp, dp, da, mp->m_dirblkfsbs,
XFS_BMAPI_METADATA, 0, args->firstblock, args->flist,
- NULL, &done))) {
+ &done))) {
/*
* ENOSPC actually can happen if we're in a removename with
* no space reservation, and the resulting block removal
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 779a267b0a8..580d99cef9e 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -24,12 +24,10 @@
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_da_btree.h"
#include "xfs_bmap_btree.h"
#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_inode_item.h"
@@ -1073,10 +1071,10 @@ xfs_dir2_sf_to_block(
*/
buf_len = dp->i_df.if_bytes;
- buf = kmem_alloc(dp->i_df.if_bytes, KM_SLEEP);
+ buf = kmem_alloc(buf_len, KM_SLEEP);
- memcpy(buf, sfp, dp->i_df.if_bytes);
- xfs_idata_realloc(dp, -dp->i_df.if_bytes, XFS_DATA_FORK);
+ memcpy(buf, sfp, buf_len);
+ xfs_idata_realloc(dp, -buf_len, XFS_DATA_FORK);
dp->i_d.di_size = 0;
xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
/*
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index 498f8d69433..921595b84f5 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -24,12 +24,10 @@
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_da_btree.h"
#include "xfs_bmap_btree.h"
#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_dir2_data.h"
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index e2d89854ec9..504be8640e9 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -25,11 +25,9 @@
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_da_btree.h"
#include "xfs_bmap_btree.h"
-#include "xfs_attr_sf.h"
#include "xfs_dir2_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
@@ -875,7 +873,7 @@ xfs_dir2_leaf_getdents(
xfs_dir2_byte_to_da(mp,
XFS_DIR2_LEAF_OFFSET) - map_off,
XFS_BMAPI_METADATA, NULL, 0,
- &map[map_valid], &nmap, NULL, NULL);
+ &map[map_valid], &nmap, NULL);
/*
* Don't know if we should ignore this or
* try to return an error.
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 78fc4d9ae75..f9a0864b696 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -24,12 +24,10 @@
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_da_btree.h"
#include "xfs_bmap_btree.h"
#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_bmap.h"
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index c1a5945d463..b1bae6b1eed 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -24,12 +24,10 @@
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_da_btree.h"
#include "xfs_bmap_btree.h"
#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_inode_item.h"
diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h
deleted file mode 100644
index 2813cdd7237..00000000000
--- a/fs/xfs/xfs_dmapi.h
+++ /dev/null
@@ -1,170 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_DMAPI_H__
-#define __XFS_DMAPI_H__
-
-/* Values used to define the on-disk version of dm_attrname_t. All
- * on-disk attribute names start with the 8-byte string "SGI_DMI_".
- *
- * In the on-disk inode, DMAPI attribute names consist of the user-provided
- * name with the DMATTR_PREFIXSTRING pre-pended. This string must NEVER be
- * changed.
- */
-
-#define DMATTR_PREFIXLEN 8
-#define DMATTR_PREFIXSTRING "SGI_DMI_"
-
-typedef enum {
- DM_EVENT_INVALID = -1,
- DM_EVENT_CANCEL = 0, /* not supported */
- DM_EVENT_MOUNT = 1,
- DM_EVENT_PREUNMOUNT = 2,
- DM_EVENT_UNMOUNT = 3,
- DM_EVENT_DEBUT = 4, /* not supported */
- DM_EVENT_CREATE = 5,
- DM_EVENT_CLOSE = 6, /* not supported */
- DM_EVENT_POSTCREATE = 7,
- DM_EVENT_REMOVE = 8,
- DM_EVENT_POSTREMOVE = 9,
- DM_EVENT_RENAME = 10,
- DM_EVENT_POSTRENAME = 11,
- DM_EVENT_LINK = 12,
- DM_EVENT_POSTLINK = 13,
- DM_EVENT_SYMLINK = 14,
- DM_EVENT_POSTSYMLINK = 15,
- DM_EVENT_READ = 16,
- DM_EVENT_WRITE = 17,
- DM_EVENT_TRUNCATE = 18,
- DM_EVENT_ATTRIBUTE = 19,
- DM_EVENT_DESTROY = 20,
- DM_EVENT_NOSPACE = 21,
- DM_EVENT_USER = 22,
- DM_EVENT_MAX = 23
-} dm_eventtype_t;
-#define HAVE_DM_EVENTTYPE_T
-
-typedef enum {
- DM_RIGHT_NULL,
- DM_RIGHT_SHARED,
- DM_RIGHT_EXCL
-} dm_right_t;
-#define HAVE_DM_RIGHT_T
-
-/* Defines for determining if an event message should be sent. */
-#ifdef HAVE_DMAPI
-#define DM_EVENT_ENABLED(ip, event) ( \
- unlikely ((ip)->i_mount->m_flags & XFS_MOUNT_DMAPI) && \
- ( ((ip)->i_d.di_dmevmask & (1 << event)) || \
- ((ip)->i_mount->m_dmevmask & (1 << event)) ) \
- )
-#else
-#define DM_EVENT_ENABLED(ip, event) (0)
-#endif
-
-#define DM_XFS_VALID_FS_EVENTS ( \
- (1 << DM_EVENT_PREUNMOUNT) | \
- (1 << DM_EVENT_UNMOUNT) | \
- (1 << DM_EVENT_NOSPACE) | \
- (1 << DM_EVENT_DEBUT) | \
- (1 << DM_EVENT_CREATE) | \
- (1 << DM_EVENT_POSTCREATE) | \
- (1 << DM_EVENT_REMOVE) | \
- (1 << DM_EVENT_POSTREMOVE) | \
- (1 << DM_EVENT_RENAME) | \
- (1 << DM_EVENT_POSTRENAME) | \
- (1 << DM_EVENT_LINK) | \
- (1 << DM_EVENT_POSTLINK) | \
- (1 << DM_EVENT_SYMLINK) | \
- (1 << DM_EVENT_POSTSYMLINK) | \
- (1 << DM_EVENT_ATTRIBUTE) | \
- (1 << DM_EVENT_DESTROY) )
-
-/* Events valid in dm_set_eventlist() when called with a file handle for
- a regular file or a symlink. These events are persistent.
-*/
-
-#define DM_XFS_VALID_FILE_EVENTS ( \
- (1 << DM_EVENT_ATTRIBUTE) | \
- (1 << DM_EVENT_DESTROY) )
-
-/* Events valid in dm_set_eventlist() when called with a file handle for
- a directory. These events are persistent.
-*/
-
-#define DM_XFS_VALID_DIRECTORY_EVENTS ( \
- (1 << DM_EVENT_CREATE) | \
- (1 << DM_EVENT_POSTCREATE) | \
- (1 << DM_EVENT_REMOVE) | \
- (1 << DM_EVENT_POSTREMOVE) | \
- (1 << DM_EVENT_RENAME) | \
- (1 << DM_EVENT_POSTRENAME) | \
- (1 << DM_EVENT_LINK) | \
- (1 << DM_EVENT_POSTLINK) | \
- (1 << DM_EVENT_SYMLINK) | \
- (1 << DM_EVENT_POSTSYMLINK) | \
- (1 << DM_EVENT_ATTRIBUTE) | \
- (1 << DM_EVENT_DESTROY) )
-
-/* Events supported by the XFS filesystem. */
-#define DM_XFS_SUPPORTED_EVENTS ( \
- (1 << DM_EVENT_MOUNT) | \
- (1 << DM_EVENT_PREUNMOUNT) | \
- (1 << DM_EVENT_UNMOUNT) | \
- (1 << DM_EVENT_NOSPACE) | \
- (1 << DM_EVENT_CREATE) | \
- (1 << DM_EVENT_POSTCREATE) | \
- (1 << DM_EVENT_REMOVE) | \
- (1 << DM_EVENT_POSTREMOVE) | \
- (1 << DM_EVENT_RENAME) | \
- (1 << DM_EVENT_POSTRENAME) | \
- (1 << DM_EVENT_LINK) | \
- (1 << DM_EVENT_POSTLINK) | \
- (1 << DM_EVENT_SYMLINK) | \
- (1 << DM_EVENT_POSTSYMLINK) | \
- (1 << DM_EVENT_READ) | \
- (1 << DM_EVENT_WRITE) | \
- (1 << DM_EVENT_TRUNCATE) | \
- (1 << DM_EVENT_ATTRIBUTE) | \
- (1 << DM_EVENT_DESTROY) )
-
-
-/*
- * Definitions used for the flags field on dm_send_*_event().
- */
-
-#define DM_FLAGS_NDELAY 0x001 /* return EAGAIN after dm_pending() */
-#define DM_FLAGS_UNWANTED 0x002 /* event not in fsys dm_eventset_t */
-#define DM_FLAGS_IMUX 0x004 /* thread holds i_mutex */
-#define DM_FLAGS_IALLOCSEM_RD 0x010 /* thread holds i_alloc_sem rd */
-#define DM_FLAGS_IALLOCSEM_WR 0x020 /* thread holds i_alloc_sem wr */
-
-/*
- * Pull in platform specific event flags defines
- */
-#include "xfs_dmapi_priv.h"
-
-/*
- * Macros to turn caller specified delay/block flags into
- * dm_send_xxxx_event flag DM_FLAGS_NDELAY.
- */
-
-#define FILP_DELAY_FLAG(filp) ((filp->f_flags&(O_NDELAY|O_NONBLOCK)) ? \
- DM_FLAGS_NDELAY : 0)
-#define AT_DELAY_FLAG(f) ((f & XFS_ATTR_NONBLOCK) ? DM_FLAGS_NDELAY : 0)
-
-#endif /* __XFS_DMAPI_H__ */
diff --git a/fs/xfs/xfs_dmops.c b/fs/xfs/xfs_dmops.c
deleted file mode 100644
index e71e2581c0c..00000000000
--- a/fs/xfs/xfs_dmops.c
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_log.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_dmapi.h"
-#include "xfs_inum.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-
-
-static struct xfs_dmops xfs_dmcore_stub = {
- .xfs_send_data = (xfs_send_data_t)fs_nosys,
- .xfs_send_mmap = (xfs_send_mmap_t)fs_noerr,
- .xfs_send_destroy = (xfs_send_destroy_t)fs_nosys,
- .xfs_send_namesp = (xfs_send_namesp_t)fs_nosys,
- .xfs_send_mount = (xfs_send_mount_t)fs_nosys,
- .xfs_send_unmount = (xfs_send_unmount_t)fs_noerr,
-};
-
-int
-xfs_dmops_get(struct xfs_mount *mp)
-{
- if (mp->m_flags & XFS_MOUNT_DMAPI) {
- cmn_err(CE_WARN,
- "XFS: dmapi support not available in this kernel.");
- return EINVAL;
- }
-
- mp->m_dm_ops = &xfs_dmcore_stub;
- return 0;
-}
-
-void
-xfs_dmops_put(struct xfs_mount *mp)
-{
-}
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 047b8a8e5c2..ed999026766 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -23,12 +23,8 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_utils.h"
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 409fe81585f..a55e687bf56 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -24,7 +24,6 @@
#include "xfs_buf_item.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_trans_priv.h"
#include "xfs_extfree_item.h"
@@ -33,18 +32,19 @@
kmem_zone_t *xfs_efi_zone;
kmem_zone_t *xfs_efd_zone;
-STATIC void xfs_efi_item_unlock(xfs_efi_log_item_t *);
+static inline struct xfs_efi_log_item *EFI_ITEM(struct xfs_log_item *lip)
+{
+ return container_of(lip, struct xfs_efi_log_item, efi_item);
+}
void
-xfs_efi_item_free(xfs_efi_log_item_t *efip)
+xfs_efi_item_free(
+ struct xfs_efi_log_item *efip)
{
- int nexts = efip->efi_format.efi_nextents;
-
- if (nexts > XFS_EFI_MAX_FAST_EXTENTS) {
+ if (efip->efi_format.efi_nextents > XFS_EFI_MAX_FAST_EXTENTS)
kmem_free(efip);
- } else {
+ else
kmem_zone_free(xfs_efi_zone, efip);
- }
}
/*
@@ -52,9 +52,9 @@ xfs_efi_item_free(xfs_efi_log_item_t *efip)
* We only need 1 iovec for an efi item. It just logs the efi_log_format
* structure.
*/
-/*ARGSUSED*/
STATIC uint
-xfs_efi_item_size(xfs_efi_log_item_t *efip)
+xfs_efi_item_size(
+ struct xfs_log_item *lip)
{
return 1;
}
@@ -67,10 +67,12 @@ xfs_efi_item_size(xfs_efi_log_item_t *efip)
* slots in the efi item have been filled.
*/
STATIC void
-xfs_efi_item_format(xfs_efi_log_item_t *efip,
- xfs_log_iovec_t *log_vector)
+xfs_efi_item_format(
+ struct xfs_log_item *lip,
+ struct xfs_log_iovec *log_vector)
{
- uint size;
+ struct xfs_efi_log_item *efip = EFI_ITEM(lip);
+ uint size;
ASSERT(efip->efi_next_extent == efip->efi_format.efi_nextents);
@@ -80,7 +82,7 @@ xfs_efi_item_format(xfs_efi_log_item_t *efip,
size += (efip->efi_format.efi_nextents - 1) * sizeof(xfs_extent_t);
efip->efi_format.efi_size = 1;
- log_vector->i_addr = (xfs_caddr_t)&(efip->efi_format);
+ log_vector->i_addr = &efip->efi_format;
log_vector->i_len = size;
log_vector->i_type = XLOG_REG_TYPE_EFI_FORMAT;
ASSERT(size >= sizeof(xfs_efi_log_format_t));
@@ -90,60 +92,33 @@ xfs_efi_item_format(xfs_efi_log_item_t *efip,
/*
* Pinning has no meaning for an efi item, so just return.
*/
-/*ARGSUSED*/
STATIC void
-xfs_efi_item_pin(xfs_efi_log_item_t *efip)
+xfs_efi_item_pin(
+ struct xfs_log_item *lip)
{
- return;
}
-
/*
* While EFIs cannot really be pinned, the unpin operation is the
* last place at which the EFI is manipulated during a transaction.
* Here we coordinate with xfs_efi_cancel() to determine who gets to
* free the EFI.
*/
-/*ARGSUSED*/
-STATIC void
-xfs_efi_item_unpin(xfs_efi_log_item_t *efip)
-{
- struct xfs_ail *ailp = efip->efi_item.li_ailp;
-
- spin_lock(&ailp->xa_lock);
- if (efip->efi_flags & XFS_EFI_CANCELED) {
- /* xfs_trans_ail_delete() drops the AIL lock. */
- xfs_trans_ail_delete(ailp, (xfs_log_item_t *)efip);
- xfs_efi_item_free(efip);
- } else {
- efip->efi_flags |= XFS_EFI_COMMITTED;
- spin_unlock(&ailp->xa_lock);
- }
-}
-
-/*
- * like unpin only we have to also clear the xaction descriptor
- * pointing the log item if we free the item. This routine duplicates
- * unpin because efi_flags is protected by the AIL lock. Freeing
- * the descriptor and then calling unpin would force us to drop the AIL
- * lock which would open up a race condition.
- */
STATIC void
-xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp)
+xfs_efi_item_unpin(
+ struct xfs_log_item *lip,
+ int remove)
{
- struct xfs_ail *ailp = efip->efi_item.li_ailp;
- xfs_log_item_desc_t *lidp;
+ struct xfs_efi_log_item *efip = EFI_ITEM(lip);
+ struct xfs_ail *ailp = lip->li_ailp;
spin_lock(&ailp->xa_lock);
if (efip->efi_flags & XFS_EFI_CANCELED) {
- /*
- * free the xaction descriptor pointing to this item
- */
- lidp = xfs_trans_find_item(tp, (xfs_log_item_t *) efip);
- xfs_trans_free_item(tp, lidp);
+ if (remove)
+ xfs_trans_del_item(lip);
/* xfs_trans_ail_delete() drops the AIL lock. */
- xfs_trans_ail_delete(ailp, (xfs_log_item_t *)efip);
+ xfs_trans_ail_delete(ailp, lip);
xfs_efi_item_free(efip);
} else {
efip->efi_flags |= XFS_EFI_COMMITTED;
@@ -158,9 +133,9 @@ xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp)
* XFS_ITEM_PINNED so that the caller will eventually flush the log.
* This should help in getting the EFI out of the AIL.
*/
-/*ARGSUSED*/
STATIC uint
-xfs_efi_item_trylock(xfs_efi_log_item_t *efip)
+xfs_efi_item_trylock(
+ struct xfs_log_item *lip)
{
return XFS_ITEM_PINNED;
}
@@ -168,13 +143,12 @@ xfs_efi_item_trylock(xfs_efi_log_item_t *efip)
/*
* Efi items have no locking, so just return.
*/
-/*ARGSUSED*/
STATIC void
-xfs_efi_item_unlock(xfs_efi_log_item_t *efip)
+xfs_efi_item_unlock(
+ struct xfs_log_item *lip)
{
- if (efip->efi_item.li_flags & XFS_LI_ABORTED)
- xfs_efi_item_free(efip);
- return;
+ if (lip->li_flags & XFS_LI_ABORTED)
+ xfs_efi_item_free(EFI_ITEM(lip));
}
/*
@@ -183,9 +157,10 @@ xfs_efi_item_unlock(xfs_efi_log_item_t *efip)
* flag is not paid any attention here. Checking for that is delayed
* until the EFI is unpinned.
*/
-/*ARGSUSED*/
STATIC xfs_lsn_t
-xfs_efi_item_committed(xfs_efi_log_item_t *efip, xfs_lsn_t lsn)
+xfs_efi_item_committed(
+ struct xfs_log_item *lip,
+ xfs_lsn_t lsn)
{
return lsn;
}
@@ -195,11 +170,10 @@ xfs_efi_item_committed(xfs_efi_log_item_t *efip, xfs_lsn_t lsn)
* stuck waiting for all of its corresponding efd items to be
* committed to disk.
*/
-/*ARGSUSED*/
STATIC void
-xfs_efi_item_push(xfs_efi_log_item_t *efip)
+xfs_efi_item_push(
+ struct xfs_log_item *lip)
{
- return;
}
/*
@@ -209,61 +183,55 @@ xfs_efi_item_push(xfs_efi_log_item_t *efip)
* example, for inodes, the inode is locked throughout the extent freeing
* so the dependency should be recorded there.
*/
-/*ARGSUSED*/
STATIC void
-xfs_efi_item_committing(xfs_efi_log_item_t *efip, xfs_lsn_t lsn)
+xfs_efi_item_committing(
+ struct xfs_log_item *lip,
+ xfs_lsn_t lsn)
{
- return;
}
/*
* This is the ops vector shared by all efi log items.
*/
static struct xfs_item_ops xfs_efi_item_ops = {
- .iop_size = (uint(*)(xfs_log_item_t*))xfs_efi_item_size,
- .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
- xfs_efi_item_format,
- .iop_pin = (void(*)(xfs_log_item_t*))xfs_efi_item_pin,
- .iop_unpin = (void(*)(xfs_log_item_t*))xfs_efi_item_unpin,
- .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t *))
- xfs_efi_item_unpin_remove,
- .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_efi_item_trylock,
- .iop_unlock = (void(*)(xfs_log_item_t*))xfs_efi_item_unlock,
- .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
- xfs_efi_item_committed,
- .iop_push = (void(*)(xfs_log_item_t*))xfs_efi_item_push,
- .iop_pushbuf = NULL,
- .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
- xfs_efi_item_committing
+ .iop_size = xfs_efi_item_size,
+ .iop_format = xfs_efi_item_format,
+ .iop_pin = xfs_efi_item_pin,
+ .iop_unpin = xfs_efi_item_unpin,
+ .iop_trylock = xfs_efi_item_trylock,
+ .iop_unlock = xfs_efi_item_unlock,
+ .iop_committed = xfs_efi_item_committed,
+ .iop_push = xfs_efi_item_push,
+ .iop_committing = xfs_efi_item_committing
};
/*
* Allocate and initialize an efi item with the given number of extents.
*/
-xfs_efi_log_item_t *
-xfs_efi_init(xfs_mount_t *mp,
- uint nextents)
+struct xfs_efi_log_item *
+xfs_efi_init(
+ struct xfs_mount *mp,
+ uint nextents)
{
- xfs_efi_log_item_t *efip;
+ struct xfs_efi_log_item *efip;
uint size;
ASSERT(nextents > 0);
if (nextents > XFS_EFI_MAX_FAST_EXTENTS) {
size = (uint)(sizeof(xfs_efi_log_item_t) +
((nextents - 1) * sizeof(xfs_extent_t)));
- efip = (xfs_efi_log_item_t*)kmem_zalloc(size, KM_SLEEP);
+ efip = kmem_zalloc(size, KM_SLEEP);
} else {
- efip = (xfs_efi_log_item_t*)kmem_zone_zalloc(xfs_efi_zone,
- KM_SLEEP);
+ efip = kmem_zone_zalloc(xfs_efi_zone, KM_SLEEP);
}
xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops);
efip->efi_format.efi_nextents = nextents;
efip->efi_format.efi_id = (__psint_t)(void*)efip;
- return (efip);
+ return efip;
}
/*
@@ -276,7 +244,7 @@ xfs_efi_init(xfs_mount_t *mp,
int
xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt)
{
- xfs_efi_log_format_t *src_efi_fmt = (xfs_efi_log_format_t *)buf->i_addr;
+ xfs_efi_log_format_t *src_efi_fmt = buf->i_addr;
uint i;
uint len = sizeof(xfs_efi_log_format_t) +
(src_efi_fmt->efi_nextents - 1) * sizeof(xfs_extent_t);
@@ -289,8 +257,7 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt)
memcpy((char *)dst_efi_fmt, (char*)src_efi_fmt, len);
return 0;
} else if (buf->i_len == len32) {
- xfs_efi_log_format_32_t *src_efi_fmt_32 =
- (xfs_efi_log_format_32_t *)buf->i_addr;
+ xfs_efi_log_format_32_t *src_efi_fmt_32 = buf->i_addr;
dst_efi_fmt->efi_type = src_efi_fmt_32->efi_type;
dst_efi_fmt->efi_size = src_efi_fmt_32->efi_size;
@@ -304,8 +271,7 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt)
}
return 0;
} else if (buf->i_len == len64) {
- xfs_efi_log_format_64_t *src_efi_fmt_64 =
- (xfs_efi_log_format_64_t *)buf->i_addr;
+ xfs_efi_log_format_64_t *src_efi_fmt_64 = buf->i_addr;
dst_efi_fmt->efi_type = src_efi_fmt_64->efi_type;
dst_efi_fmt->efi_size = src_efi_fmt_64->efi_size;
@@ -356,16 +322,18 @@ xfs_efi_release(xfs_efi_log_item_t *efip,
}
}
-STATIC void
-xfs_efd_item_free(xfs_efd_log_item_t *efdp)
+static inline struct xfs_efd_log_item *EFD_ITEM(struct xfs_log_item *lip)
{
- int nexts = efdp->efd_format.efd_nextents;
+ return container_of(lip, struct xfs_efd_log_item, efd_item);
+}
- if (nexts > XFS_EFD_MAX_FAST_EXTENTS) {
+STATIC void
+xfs_efd_item_free(struct xfs_efd_log_item *efdp)
+{
+ if (efdp->efd_format.efd_nextents > XFS_EFD_MAX_FAST_EXTENTS)
kmem_free(efdp);
- } else {
+ else
kmem_zone_free(xfs_efd_zone, efdp);
- }
}
/*
@@ -373,9 +341,9 @@ xfs_efd_item_free(xfs_efd_log_item_t *efdp)
* We only need 1 iovec for an efd item. It just logs the efd_log_format
* structure.
*/
-/*ARGSUSED*/
STATIC uint
-xfs_efd_item_size(xfs_efd_log_item_t *efdp)
+xfs_efd_item_size(
+ struct xfs_log_item *lip)
{
return 1;
}
@@ -388,10 +356,12 @@ xfs_efd_item_size(xfs_efd_log_item_t *efdp)
* slots in the efd item have been filled.
*/
STATIC void
-xfs_efd_item_format(xfs_efd_log_item_t *efdp,
- xfs_log_iovec_t *log_vector)
+xfs_efd_item_format(
+ struct xfs_log_item *lip,
+ struct xfs_log_iovec *log_vector)
{
- uint size;
+ struct xfs_efd_log_item *efdp = EFD_ITEM(lip);
+ uint size;
ASSERT(efdp->efd_next_extent == efdp->efd_format.efd_nextents);
@@ -401,48 +371,38 @@ xfs_efd_item_format(xfs_efd_log_item_t *efdp,
size += (efdp->efd_format.efd_nextents - 1) * sizeof(xfs_extent_t);
efdp->efd_format.efd_size = 1;
- log_vector->i_addr = (xfs_caddr_t)&(efdp->efd_format);
+ log_vector->i_addr = &efdp->efd_format;
log_vector->i_len = size;
log_vector->i_type = XLOG_REG_TYPE_EFD_FORMAT;
ASSERT(size >= sizeof(xfs_efd_log_format_t));
}
-
/*
* Pinning has no meaning for an efd item, so just return.
*/
-/*ARGSUSED*/
STATIC void
-xfs_efd_item_pin(xfs_efd_log_item_t *efdp)
+xfs_efd_item_pin(
+ struct xfs_log_item *lip)
{
- return;
}
-
/*
* Since pinning has no meaning for an efd item, unpinning does
* not either.
*/
-/*ARGSUSED*/
-STATIC void
-xfs_efd_item_unpin(xfs_efd_log_item_t *efdp)
-{
- return;
-}
-
-/*ARGSUSED*/
STATIC void
-xfs_efd_item_unpin_remove(xfs_efd_log_item_t *efdp, xfs_trans_t *tp)
+xfs_efd_item_unpin(
+ struct xfs_log_item *lip,
+ int remove)
{
- return;
}
/*
* Efd items have no locking, so just return success.
*/
-/*ARGSUSED*/
STATIC uint
-xfs_efd_item_trylock(xfs_efd_log_item_t *efdp)
+xfs_efd_item_trylock(
+ struct xfs_log_item *lip)
{
return XFS_ITEM_LOCKED;
}
@@ -451,13 +411,12 @@ xfs_efd_item_trylock(xfs_efd_log_item_t *efdp)
* Efd items have no locking or pushing, so return failure
* so that the caller doesn't bother with us.
*/
-/*ARGSUSED*/
STATIC void
-xfs_efd_item_unlock(xfs_efd_log_item_t *efdp)
+xfs_efd_item_unlock(
+ struct xfs_log_item *lip)
{
- if (efdp->efd_item.li_flags & XFS_LI_ABORTED)
- xfs_efd_item_free(efdp);
- return;
+ if (lip->li_flags & XFS_LI_ABORTED)
+ xfs_efd_item_free(EFD_ITEM(lip));
}
/*
@@ -467,15 +426,18 @@ xfs_efd_item_unlock(xfs_efd_log_item_t *efdp)
* return -1 to keep the transaction code from further referencing
* this item.
*/
-/*ARGSUSED*/
STATIC xfs_lsn_t
-xfs_efd_item_committed(xfs_efd_log_item_t *efdp, xfs_lsn_t lsn)
+xfs_efd_item_committed(
+ struct xfs_log_item *lip,
+ xfs_lsn_t lsn)
{
+ struct xfs_efd_log_item *efdp = EFD_ITEM(lip);
+
/*
* If we got a log I/O error, it's always the case that the LR with the
* EFI got unpinned and freed before the EFD got aborted.
*/
- if ((efdp->efd_item.li_flags & XFS_LI_ABORTED) == 0)
+ if (!(lip->li_flags & XFS_LI_ABORTED))
xfs_efi_release(efdp->efd_efip, efdp->efd_format.efd_nextents);
xfs_efd_item_free(efdp);
@@ -486,11 +448,10 @@ xfs_efd_item_committed(xfs_efd_log_item_t *efdp, xfs_lsn_t lsn)
* There isn't much you can do to push on an efd item. It is simply
* stuck waiting for the log to be flushed to disk.
*/
-/*ARGSUSED*/
STATIC void
-xfs_efd_item_push(xfs_efd_log_item_t *efdp)
+xfs_efd_item_push(
+ struct xfs_log_item *lip)
{
- return;
}
/*
@@ -500,55 +461,48 @@ xfs_efd_item_push(xfs_efd_log_item_t *efdp)
* example, for inodes, the inode is locked throughout the extent freeing
* so the dependency should be recorded there.
*/
-/*ARGSUSED*/
STATIC void
-xfs_efd_item_committing(xfs_efd_log_item_t *efip, xfs_lsn_t lsn)
+xfs_efd_item_committing(
+ struct xfs_log_item *lip,
+ xfs_lsn_t lsn)
{
- return;
}
/*
* This is the ops vector shared by all efd log items.
*/
static struct xfs_item_ops xfs_efd_item_ops = {
- .iop_size = (uint(*)(xfs_log_item_t*))xfs_efd_item_size,
- .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
- xfs_efd_item_format,
- .iop_pin = (void(*)(xfs_log_item_t*))xfs_efd_item_pin,
- .iop_unpin = (void(*)(xfs_log_item_t*))xfs_efd_item_unpin,
- .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*))
- xfs_efd_item_unpin_remove,
- .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_efd_item_trylock,
- .iop_unlock = (void(*)(xfs_log_item_t*))xfs_efd_item_unlock,
- .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
- xfs_efd_item_committed,
- .iop_push = (void(*)(xfs_log_item_t*))xfs_efd_item_push,
- .iop_pushbuf = NULL,
- .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
- xfs_efd_item_committing
+ .iop_size = xfs_efd_item_size,
+ .iop_format = xfs_efd_item_format,
+ .iop_pin = xfs_efd_item_pin,
+ .iop_unpin = xfs_efd_item_unpin,
+ .iop_trylock = xfs_efd_item_trylock,
+ .iop_unlock = xfs_efd_item_unlock,
+ .iop_committed = xfs_efd_item_committed,
+ .iop_push = xfs_efd_item_push,
+ .iop_committing = xfs_efd_item_committing
};
-
/*
* Allocate and initialize an efd item with the given number of extents.
*/
-xfs_efd_log_item_t *
-xfs_efd_init(xfs_mount_t *mp,
- xfs_efi_log_item_t *efip,
- uint nextents)
+struct xfs_efd_log_item *
+xfs_efd_init(
+ struct xfs_mount *mp,
+ struct xfs_efi_log_item *efip,
+ uint nextents)
{
- xfs_efd_log_item_t *efdp;
+ struct xfs_efd_log_item *efdp;
uint size;
ASSERT(nextents > 0);
if (nextents > XFS_EFD_MAX_FAST_EXTENTS) {
size = (uint)(sizeof(xfs_efd_log_item_t) +
((nextents - 1) * sizeof(xfs_extent_t)));
- efdp = (xfs_efd_log_item_t*)kmem_zalloc(size, KM_SLEEP);
+ efdp = kmem_zalloc(size, KM_SLEEP);
} else {
- efdp = (xfs_efd_log_item_t*)kmem_zone_zalloc(xfs_efd_zone,
- KM_SLEEP);
+ efdp = kmem_zone_zalloc(xfs_efd_zone, KM_SLEEP);
}
xfs_log_item_init(mp, &efdp->efd_item, XFS_LI_EFD, &xfs_efd_item_ops);
@@ -556,5 +510,5 @@ xfs_efd_init(xfs_mount_t *mp,
efdp->efd_format.efd_nextents = nextents;
efdp->efd_format.efd_efi_id = efip->efi_format.efi_id;
- return (efdp);
+ return efdp;
}
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 390850ee660..9b715dce569 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -18,13 +18,9 @@
#include "xfs.h"
#include "xfs_bmap_btree.h"
#include "xfs_inum.h"
-#include "xfs_dir2.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_ag.h"
-#include "xfs_dmapi.h"
#include "xfs_log.h"
#include "xfs_trans.h"
#include "xfs_sb.h"
@@ -127,6 +123,82 @@ typedef struct fstrm_item
xfs_inode_t *pip; /* Parent directory inode pointer. */
} fstrm_item_t;
+/*
+ * Allocation group filestream associations are tracked with per-ag atomic
+ * counters. These counters allow _xfs_filestream_pick_ag() to tell whether a
+ * particular AG already has active filestreams associated with it. The mount
+ * point's m_peraglock is used to protect these counters from per-ag array
+ * re-allocation during a growfs operation. When xfs_growfs_data_private() is
+ * about to reallocate the array, it calls xfs_filestream_flush() with the
+ * m_peraglock held in write mode.
+ *
+ * Since xfs_mru_cache_flush() guarantees that all the free functions for all
+ * the cache elements have finished executing before it returns, it's safe for
+ * the free functions to use the atomic counters without m_peraglock protection.
+ * This allows the implementation of xfs_fstrm_free_func() to be agnostic about
+ * whether it was called with the m_peraglock held in read mode, write mode or
+ * not held at all. The race condition this addresses is the following:
+ *
+ * - The work queue scheduler fires and pulls a filestream directory cache
+ * element off the LRU end of the cache for deletion, then gets pre-empted.
+ * - A growfs operation grabs the m_peraglock in write mode, flushes all the
+ * remaining items from the cache and reallocates the mount point's per-ag
+ * array, resetting all the counters to zero.
+ * - The work queue thread resumes and calls the free function for the element
+ * it started cleaning up earlier. In the process it decrements the
+ * filestreams counter for an AG that now has no references.
+ *
+ * With a shrinkfs feature, the above scenario could panic the system.
+ *
+ * All other uses of the following macros should be protected by either the
+ * m_peraglock held in read mode, or the cache's internal locking exposed by the
+ * interval between a call to xfs_mru_cache_lookup() and a call to
+ * xfs_mru_cache_done(). In addition, the m_peraglock must be held in read mode
+ * when new elements are added to the cache.
+ *
+ * Combined, these locking rules ensure that no associations will ever exist in
+ * the cache that reference per-ag array elements that have since been
+ * reallocated.
+ */
+static int
+xfs_filestream_peek_ag(
+ xfs_mount_t *mp,
+ xfs_agnumber_t agno)
+{
+ struct xfs_perag *pag;
+ int ret;
+
+ pag = xfs_perag_get(mp, agno);
+ ret = atomic_read(&pag->pagf_fstrms);
+ xfs_perag_put(pag);
+ return ret;
+}
+
+static int
+xfs_filestream_get_ag(
+ xfs_mount_t *mp,
+ xfs_agnumber_t agno)
+{
+ struct xfs_perag *pag;
+ int ret;
+
+ pag = xfs_perag_get(mp, agno);
+ ret = atomic_inc_return(&pag->pagf_fstrms);
+ xfs_perag_put(pag);
+ return ret;
+}
+
+static void
+xfs_filestream_put_ag(
+ xfs_mount_t *mp,
+ xfs_agnumber_t agno)
+{
+ struct xfs_perag *pag;
+
+ pag = xfs_perag_get(mp, agno);
+ atomic_dec(&pag->pagf_fstrms);
+ xfs_perag_put(pag);
+}
/*
* Scan the AGs starting at startag looking for an AG that isn't in use and has
@@ -355,16 +427,14 @@ xfs_fstrm_free_func(
{
fstrm_item_t *item = (fstrm_item_t *)data;
xfs_inode_t *ip = item->ip;
- int ref;
ASSERT(ip->i_ino == ino);
xfs_iflags_clear(ip, XFS_IFILESTREAM);
/* Drop the reference taken on the AG when the item was added. */
- ref = xfs_filestream_put_ag(ip->i_mount, item->ag);
+ xfs_filestream_put_ag(ip->i_mount, item->ag);
- ASSERT(ref >= 0);
TRACE_FREE(ip->i_mount, ip, item->pip, item->ag,
xfs_filestream_peek_ag(ip->i_mount, item->ag));
diff --git a/fs/xfs/xfs_filestream.h b/fs/xfs/xfs_filestream.h
index 260f757bbc5..09dd9af4543 100644
--- a/fs/xfs/xfs_filestream.h
+++ b/fs/xfs/xfs_filestream.h
@@ -42,88 +42,6 @@ extern ktrace_t *xfs_filestreams_trace_buf;
#endif
-/*
- * Allocation group filestream associations are tracked with per-ag atomic
- * counters. These counters allow _xfs_filestream_pick_ag() to tell whether a
- * particular AG already has active filestreams associated with it. The mount
- * point's m_peraglock is used to protect these counters from per-ag array
- * re-allocation during a growfs operation. When xfs_growfs_data_private() is
- * about to reallocate the array, it calls xfs_filestream_flush() with the
- * m_peraglock held in write mode.
- *
- * Since xfs_mru_cache_flush() guarantees that all the free functions for all
- * the cache elements have finished executing before it returns, it's safe for
- * the free functions to use the atomic counters without m_peraglock protection.
- * This allows the implementation of xfs_fstrm_free_func() to be agnostic about
- * whether it was called with the m_peraglock held in read mode, write mode or
- * not held at all. The race condition this addresses is the following:
- *
- * - The work queue scheduler fires and pulls a filestream directory cache
- * element off the LRU end of the cache for deletion, then gets pre-empted.
- * - A growfs operation grabs the m_peraglock in write mode, flushes all the
- * remaining items from the cache and reallocates the mount point's per-ag
- * array, resetting all the counters to zero.
- * - The work queue thread resumes and calls the free function for the element
- * it started cleaning up earlier. In the process it decrements the
- * filestreams counter for an AG that now has no references.
- *
- * With a shrinkfs feature, the above scenario could panic the system.
- *
- * All other uses of the following macros should be protected by either the
- * m_peraglock held in read mode, or the cache's internal locking exposed by the
- * interval between a call to xfs_mru_cache_lookup() and a call to
- * xfs_mru_cache_done(). In addition, the m_peraglock must be held in read mode
- * when new elements are added to the cache.
- *
- * Combined, these locking rules ensure that no associations will ever exist in
- * the cache that reference per-ag array elements that have since been
- * reallocated.
- */
-/*
- * xfs_filestream_peek_ag is only used in tracing code
- */
-static inline int
-xfs_filestream_peek_ag(
- xfs_mount_t *mp,
- xfs_agnumber_t agno)
-{
- struct xfs_perag *pag;
- int ret;
-
- pag = xfs_perag_get(mp, agno);
- ret = atomic_read(&pag->pagf_fstrms);
- xfs_perag_put(pag);
- return ret;
-}
-
-static inline int
-xfs_filestream_get_ag(
- xfs_mount_t *mp,
- xfs_agnumber_t agno)
-{
- struct xfs_perag *pag;
- int ret;
-
- pag = xfs_perag_get(mp, agno);
- ret = atomic_inc_return(&pag->pagf_fstrms);
- xfs_perag_put(pag);
- return ret;
-}
-
-static inline int
-xfs_filestream_put_ag(
- xfs_mount_t *mp,
- xfs_agnumber_t agno)
-{
- struct xfs_perag *pag;
- int ret;
-
- pag = xfs_perag_get(mp, agno);
- ret = atomic_dec_return(&pag->pagf_fstrms);
- xfs_perag_put(pag);
- return ret;
-}
-
/* allocation selection flags */
typedef enum xfs_fstrm_alloc {
XFS_PICK_USERDATA = 1,
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 37a6f62c57b..dbca5f5c37b 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -24,14 +24,10 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
#include "xfs_alloc_btree.h"
#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_inode_item.h"
@@ -626,8 +622,7 @@ xfs_fs_log_dummy(
ip = mp->m_rootip;
xfs_ilock(ip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
- xfs_trans_ihold(tp, ip);
+ xfs_trans_ijoin(tp, ip);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
xfs_trans_set_sync(tp);
error = xfs_trans_commit(tp, 0);
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 9d884c127bb..abf80ae1e95 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -24,14 +24,10 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
#include "xfs_alloc_btree.h"
#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
@@ -1203,6 +1199,63 @@ error0:
return error;
}
+STATIC int
+xfs_imap_lookup(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ xfs_agnumber_t agno,
+ xfs_agino_t agino,
+ xfs_agblock_t agbno,
+ xfs_agblock_t *chunk_agbno,
+ xfs_agblock_t *offset_agbno,
+ int flags)
+{
+ struct xfs_inobt_rec_incore rec;
+ struct xfs_btree_cur *cur;
+ struct xfs_buf *agbp;
+ xfs_agino_t startino;
+ int error;
+ int i;
+
+ error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
+ if (error) {
+ xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
+ "xfs_ialloc_read_agi() returned "
+ "error %d, agno %d",
+ error, agno);
+ return error;
+ }
+
+ /*
+ * derive and lookup the exact inode record for the given agino. If the
+ * record cannot be found, then it's an invalid inode number and we
+ * should abort.
+ */
+ cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
+ startino = agino & ~(XFS_IALLOC_INODES(mp) - 1);
+ error = xfs_inobt_lookup(cur, startino, XFS_LOOKUP_EQ, &i);
+ if (!error) {
+ if (i)
+ error = xfs_inobt_get_rec(cur, &rec, &i);
+ if (!error && i == 0)
+ error = EINVAL;
+ }
+
+ xfs_trans_brelse(tp, agbp);
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+ if (error)
+ return error;
+
+ /* for untrusted inodes check it is allocated first */
+ if ((flags & XFS_IGET_UNTRUSTED) &&
+ (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)))
+ return EINVAL;
+
+ *chunk_agbno = XFS_AGINO_TO_AGBNO(mp, rec.ir_startino);
+ *offset_agbno = agbno - *chunk_agbno;
+ return 0;
+}
+
/*
* Return the location of the inode in imap, for mapping it into a buffer.
*/
@@ -1235,8 +1288,11 @@ xfs_imap(
if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks ||
ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
#ifdef DEBUG
- /* no diagnostics for bulkstat, ino comes from userspace */
- if (flags & XFS_IGET_BULKSTAT)
+ /*
+ * Don't output diagnostic information for untrusted inodes
+ * as they can be invalid without implying corruption.
+ */
+ if (flags & XFS_IGET_UNTRUSTED)
return XFS_ERROR(EINVAL);
if (agno >= mp->m_sb.sb_agcount) {
xfs_fs_cmn_err(CE_ALERT, mp,
@@ -1263,6 +1319,23 @@ xfs_imap(
return XFS_ERROR(EINVAL);
}
+ blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog;
+
+ /*
+ * For bulkstat and handle lookups, we have an untrusted inode number
+ * that we have to verify is valid. We cannot do this just by reading
+ * the inode buffer as it may have been unlinked and removed leaving
+ * inodes in stale state on disk. Hence we have to do a btree lookup
+ * in all cases where an untrusted inode number is passed.
+ */
+ if (flags & XFS_IGET_UNTRUSTED) {
+ error = xfs_imap_lookup(mp, tp, agno, agino, agbno,
+ &chunk_agbno, &offset_agbno, flags);
+ if (error)
+ return error;
+ goto out_map;
+ }
+
/*
* If the inode cluster size is the same as the blocksize or
* smaller we get to the buffer by simple arithmetics.
@@ -1277,24 +1350,6 @@ xfs_imap(
return 0;
}
- blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog;
-
- /*
- * If we get a block number passed from bulkstat we can use it to
- * find the buffer easily.
- */
- if (imap->im_blkno) {
- offset = XFS_INO_TO_OFFSET(mp, ino);
- ASSERT(offset < mp->m_sb.sb_inopblock);
-
- cluster_agbno = xfs_daddr_to_agbno(mp, imap->im_blkno);
- offset += (agbno - cluster_agbno) * mp->m_sb.sb_inopblock;
-
- imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster);
- imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog);
- return 0;
- }
-
/*
* If the inode chunks are aligned then use simple maths to
* find the location. Otherwise we have to do a btree
@@ -1304,50 +1359,13 @@ xfs_imap(
offset_agbno = agbno & mp->m_inoalign_mask;
chunk_agbno = agbno - offset_agbno;
} else {
- xfs_btree_cur_t *cur; /* inode btree cursor */
- xfs_inobt_rec_incore_t chunk_rec;
- xfs_buf_t *agbp; /* agi buffer */
- int i; /* temp state */
-
- error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
- if (error) {
- xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
- "xfs_ialloc_read_agi() returned "
- "error %d, agno %d",
- error, agno);
- return error;
- }
-
- cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
- error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);
- if (error) {
- xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
- "xfs_inobt_lookup() failed");
- goto error0;
- }
-
- error = xfs_inobt_get_rec(cur, &chunk_rec, &i);
- if (error) {
- xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
- "xfs_inobt_get_rec() failed");
- goto error0;
- }
- if (i == 0) {
-#ifdef DEBUG
- xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
- "xfs_inobt_get_rec() failed");
-#endif /* DEBUG */
- error = XFS_ERROR(EINVAL);
- }
- error0:
- xfs_trans_brelse(tp, agbp);
- xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+ error = xfs_imap_lookup(mp, tp, agno, agino, agbno,
+ &chunk_agbno, &offset_agbno, flags);
if (error)
return error;
- chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_rec.ir_startino);
- offset_agbno = agbno - chunk_agbno;
}
+out_map:
ASSERT(agbno >= chunk_agbno);
cluster_agbno = chunk_agbno +
((offset_agbno / blks_per_cluster) * blks_per_cluster);
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
index c282a9af539..d352862cefa 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -24,14 +24,10 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
#include "xfs_alloc_btree.h"
#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 75df75f43d4..b1ecc6f97ad 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -25,14 +25,10 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
#include "xfs_alloc_btree.h"
#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
@@ -95,7 +91,7 @@ xfs_inode_alloc(
return ip;
}
-STATIC void
+void
xfs_inode_free(
struct xfs_inode *ip)
{
@@ -212,7 +208,7 @@ xfs_iget_cache_hit(
ip->i_flags &= ~XFS_INEW;
ip->i_flags |= XFS_IRECLAIMABLE;
__xfs_inode_set_reclaim_tag(pag, ip);
- trace_xfs_iget_reclaim(ip);
+ trace_xfs_iget_reclaim_fail(ip);
goto out_error;
}
@@ -227,6 +223,7 @@ xfs_iget_cache_hit(
} else {
/* If the VFS inode is being torn down, pause and try again. */
if (!igrab(inode)) {
+ trace_xfs_iget_skip(ip);
error = EAGAIN;
goto out_error;
}
@@ -234,6 +231,7 @@ xfs_iget_cache_hit(
/* We've got a live one. */
spin_unlock(&ip->i_flags_lock);
read_unlock(&pag->pag_ici_lock);
+ trace_xfs_iget_hit(ip);
}
if (lock_flags != 0)
@@ -242,7 +240,6 @@ xfs_iget_cache_hit(
xfs_iflags_clear(ip, XFS_ISTALE);
XFS_STATS_INC(xs_ig_found);
- trace_xfs_iget_found(ip);
return 0;
out_error:
@@ -259,24 +256,22 @@ xfs_iget_cache_miss(
xfs_trans_t *tp,
xfs_ino_t ino,
struct xfs_inode **ipp,
- xfs_daddr_t bno,
int flags,
int lock_flags)
{
struct xfs_inode *ip;
int error;
- unsigned long first_index, mask;
xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino);
ip = xfs_inode_alloc(mp, ino);
if (!ip)
return ENOMEM;
- error = xfs_iread(mp, tp, ip, bno, flags);
+ error = xfs_iread(mp, tp, ip, flags);
if (error)
goto out_destroy;
- xfs_itrace_entry(ip);
+ trace_xfs_iget_miss(ip);
if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
error = ENOENT;
@@ -302,8 +297,6 @@ xfs_iget_cache_miss(
BUG();
}
- mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
- first_index = agino & mask;
write_lock(&pag->pag_ici_lock);
/* insert the new inode */
@@ -322,7 +315,6 @@ xfs_iget_cache_miss(
write_unlock(&pag->pag_ici_lock);
radix_tree_preload_end();
- trace_xfs_iget_alloc(ip);
*ipp = ip;
return 0;
@@ -358,8 +350,6 @@ out_destroy:
* within the file system for the inode being requested.
* lock_flags -- flags indicating how to lock the inode. See the comment
* for xfs_ilock() for a list of valid values.
- * bno -- the block number starting the buffer containing the inode,
- * if known (as by bulkstat), else 0.
*/
int
xfs_iget(
@@ -368,8 +358,7 @@ xfs_iget(
xfs_ino_t ino,
uint flags,
uint lock_flags,
- xfs_inode_t **ipp,
- xfs_daddr_t bno)
+ xfs_inode_t **ipp)
{
xfs_inode_t *ip;
int error;
@@ -397,7 +386,7 @@ again:
read_unlock(&pag->pag_ici_lock);
XFS_STATS_INC(xs_ig_missed);
- error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, bno,
+ error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip,
flags, lock_flags);
if (error)
goto out_error_or_again;
@@ -426,97 +415,6 @@ out_error_or_again:
}
/*
- * Decrement reference count of an inode structure and unlock it.
- *
- * ip -- the inode being released
- * lock_flags -- this parameter indicates the inode's locks to be
- * to be released. See the comment on xfs_iunlock() for a list
- * of valid values.
- */
-void
-xfs_iput(xfs_inode_t *ip,
- uint lock_flags)
-{
- xfs_itrace_entry(ip);
- xfs_iunlock(ip, lock_flags);
- IRELE(ip);
-}
-
-/*
- * Special iput for brand-new inodes that are still locked
- */
-void
-xfs_iput_new(
- xfs_inode_t *ip,
- uint lock_flags)
-{
- struct inode *inode = VFS_I(ip);
-
- xfs_itrace_entry(ip);
-
- if ((ip->i_d.di_mode == 0)) {
- ASSERT(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
- make_bad_inode(inode);
- }
- if (inode->i_state & I_NEW)
- unlock_new_inode(inode);
- if (lock_flags)
- xfs_iunlock(ip, lock_flags);
- IRELE(ip);
-}
-
-/*
- * This is called free all the memory associated with an inode.
- * It must free the inode itself and any buffers allocated for
- * if_extents/if_data and if_broot. It must also free the lock
- * associated with the inode.
- *
- * Note: because we don't initialise everything on reallocation out
- * of the zone, we must ensure we nullify everything correctly before
- * freeing the structure.
- */
-void
-xfs_ireclaim(
- struct xfs_inode *ip)
-{
- struct xfs_mount *mp = ip->i_mount;
- struct xfs_perag *pag;
- xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
-
- XFS_STATS_INC(xs_ig_reclaims);
-
- /*
- * Remove the inode from the per-AG radix tree.
- *
- * Because radix_tree_delete won't complain even if the item was never
- * added to the tree assert that it's been there before to catch
- * problems with the inode life time early on.
- */