NFS: Fix a race in sillyrename
lookup() and sillyrename() can race one another because the sillyrename()
completion cannot take the parent directory's inode->i_mutex since the
latter may be held by whoever is calling dput().
We therefore have little option but to add extra locking to ensure that
nfs_lookup() and nfs_atomic_open() do not race with the sillyrename
completion.
If somebody has looked up the sillyrenamed file in the meantime, we just
transfer the sillydelete information to the new dentry.
Please refer to the bug-report at
http://bugzilla.linux-nfs.org/show_bug.cgi?id=150
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 8ec7fbd..3533453 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -562,6 +562,7 @@
nfs_fattr_init(&fattr);
desc->entry = &my_entry;
+ nfs_block_sillyrename(dentry);
while(!desc->entry->eof) {
res = readdir_search_pagecache(desc);
@@ -592,6 +593,7 @@
break;
}
}
+ nfs_unblock_sillyrename(dentry);
unlock_kernel();
if (res > 0)
res = 0;
@@ -866,6 +868,7 @@
static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
{
struct dentry *res;
+ struct dentry *parent;
struct inode *inode = NULL;
int error;
struct nfs_fh fhandle;
@@ -894,26 +897,31 @@
goto out_unlock;
}
+ parent = dentry->d_parent;
+ /* Protect against concurrent sillydeletes */
+ nfs_block_sillyrename(parent);
error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
if (error == -ENOENT)
goto no_entry;
if (error < 0) {
res = ERR_PTR(error);
- goto out_unlock;
+ goto out_unblock_sillyrename;
}
inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr);
res = (struct dentry *)inode;
if (IS_ERR(res))
- goto out_unlock;
+ goto out_unblock_sillyrename;
no_entry:
res = d_materialise_unique(dentry, inode);
if (res != NULL) {
if (IS_ERR(res))
- goto out_unlock;
+ goto out_unblock_sillyrename;
dentry = res;
}
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+out_unblock_sillyrename:
+ nfs_unblock_sillyrename(parent);
out_unlock:
unlock_kernel();
out:
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 6d2f2a3..173e294 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1169,6 +1169,9 @@
INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
nfsi->ncommit = 0;
nfsi->npages = 0;
+ atomic_set(&nfsi->silly_count, 1);
+ INIT_HLIST_HEAD(&nfsi->silly_list);
+ init_waitqueue_head(&nfsi->waitqueue);
nfs4_init_once(nfsi);
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index cb99fd9..2cb3b8b 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1372,6 +1372,7 @@
struct dentry *
nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
{
+ struct dentry *parent;
struct path path = {
.mnt = nd->mnt,
.dentry = dentry,
@@ -1394,6 +1395,9 @@
cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
if (IS_ERR(cred))
return (struct dentry *)cred;
+ parent = dentry->d_parent;
+ /* Protect against concurrent sillydeletes */
+ nfs_block_sillyrename(parent);
state = nfs4_do_open(dir, &path, nd->intent.open.flags, &attr, cred);
put_rpccred(cred);
if (IS_ERR(state)) {
@@ -1401,12 +1405,14 @@
d_add(dentry, NULL);
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
}
+ nfs_unblock_sillyrename(parent);
return (struct dentry *)state;
}
res = d_add_unique(dentry, igrab(state->inode));
if (res != NULL)
path.dentry = res;
nfs_set_verifier(path.dentry, nfs_save_change_attribute(dir));
+ nfs_unblock_sillyrename(parent);
nfs4_intent_set_file(nd, &path, state);
return res;
}
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 1aed850..6ecd46c 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -14,6 +14,7 @@
struct nfs_unlinkdata {
+ struct hlist_node list;
struct nfs_removeargs args;
struct nfs_removeres res;
struct inode *dir;
@@ -52,6 +53,20 @@
return 0;
}
+static void nfs_free_dname(struct nfs_unlinkdata *data)
+{
+ kfree(data->args.name.name);
+ data->args.name.name = NULL;
+ data->args.name.len = 0;
+}
+
+static void nfs_dec_sillycount(struct inode *dir)
+{
+ struct nfs_inode *nfsi = NFS_I(dir);
+ if (atomic_dec_return(&nfsi->silly_count) == 1)
+ wake_up(&nfsi->waitqueue);
+}
+
/**
* nfs_async_unlink_init - Initialize the RPC info
* task: rpc_task of the sillydelete
@@ -95,6 +110,8 @@
static void nfs_async_unlink_release(void *calldata)
{
struct nfs_unlinkdata *data = calldata;
+
+ nfs_dec_sillycount(data->dir);
nfs_free_unlinkdata(data);
}
@@ -104,24 +121,35 @@
.rpc_release = nfs_async_unlink_release,
};
-static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data)
+static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct nfs_unlinkdata *data)
{
struct rpc_task *task;
- struct dentry *parent;
- struct inode *dir;
+ struct dentry *alias;
- if (nfs_copy_dname(dentry, data) < 0)
- goto out_free;
-
- parent = dget_parent(dentry);
- if (parent == NULL)
- goto out_free;
- dir = igrab(parent->d_inode);
- dput(parent);
- if (dir == NULL)
- goto out_free;
-
- data->dir = dir;
+ alias = d_lookup(parent, &data->args.name);
+ if (alias != NULL) {
+ int ret = 0;
+ /*
+ * Hey, we raced with lookup... See if we need to transfer
+ * the sillyrename information to the aliased dentry.
+ */
+ nfs_free_dname(data);
+ spin_lock(&alias->d_lock);
+ if (!(alias->d_flags & DCACHE_NFSFS_RENAMED)) {
+ alias->d_fsdata = data;
+ alias->d_flags ^= DCACHE_NFSFS_RENAMED;
+ ret = 1;
+ }
+ spin_unlock(&alias->d_lock);
+ nfs_dec_sillycount(dir);
+ dput(alias);
+ return ret;
+ }
+ data->dir = igrab(dir);
+ if (!data->dir) {
+ nfs_dec_sillycount(dir);
+ return 0;
+ }
data->args.fh = NFS_FH(dir);
nfs_fattr_init(&data->res.dir_attr);
@@ -129,8 +157,64 @@
if (!IS_ERR(task))
rpc_put_task(task);
return 1;
+}
+
+static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data)
+{
+ struct dentry *parent;
+ struct inode *dir;
+ int ret = 0;
+
+
+ parent = dget_parent(dentry);
+ if (parent == NULL)
+ goto out_free;
+ dir = parent->d_inode;
+ if (nfs_copy_dname(dentry, data) == 0)
+ goto out_dput;
+ /* Non-exclusive lock protects against concurrent lookup() calls */
+ spin_lock(&dir->i_lock);
+ if (atomic_inc_not_zero(&NFS_I(dir)->silly_count) == 0) {
+ /* Deferred delete */
+ hlist_add_head(&data->list, &NFS_I(dir)->silly_list);
+ spin_unlock(&dir->i_lock);
+ ret = 1;
+ goto out_dput;
+ }
+ spin_unlock(&dir->i_lock);
+ ret = nfs_do_call_unlink(parent, dir, data);
+out_dput:
+ dput(parent);
out_free:
- return 0;
+ return ret;
+}
+
+void nfs_block_sillyrename(struct dentry *dentry)
+{
+ struct nfs_inode *nfsi = NFS_I(dentry->d_inode);
+
+ wait_event(nfsi->waitqueue, atomic_cmpxchg(&nfsi->silly_count, 1, 0) == 1);
+}
+
+void nfs_unblock_sillyrename(struct dentry *dentry)
+{
+ struct inode *dir = dentry->d_inode;
+ struct nfs_inode *nfsi = NFS_I(dir);
+ struct nfs_unlinkdata *data;
+
+ atomic_inc(&nfsi->silly_count);
+ spin_lock(&dir->i_lock);
+ while (!hlist_empty(&nfsi->silly_list)) {
+ if (!atomic_inc_not_zero(&nfsi->silly_count))
+ break;
+ data = hlist_entry(nfsi->silly_list.first, struct nfs_unlinkdata, list);
+ hlist_del(&data->list);
+ spin_unlock(&dir->i_lock);
+ if (nfs_do_call_unlink(dentry, dir, data) == 0)
+ nfs_free_unlinkdata(data);
+ spin_lock(&dir->i_lock);
+ }
+ spin_unlock(&dir->i_lock);
}
/**
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index c5164c2..e82a6eb 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -160,6 +160,12 @@
/* Open contexts for shared mmap writes */
struct list_head open_files;
+ /* Number of in-flight sillydelete RPC calls */
+ atomic_t silly_count;
+ /* List of deferred sillydelete requests */
+ struct hlist_head silly_list;
+ wait_queue_head_t waitqueue;
+
#ifdef CONFIG_NFS_V4
struct nfs4_cached_acl *nfs4_acl;
/* NFSv4 state */
@@ -394,6 +400,8 @@
*/
extern int nfs_async_unlink(struct inode *dir, struct dentry *dentry);
extern void nfs_complete_unlink(struct dentry *dentry, struct inode *);
+extern void nfs_block_sillyrename(struct dentry *dentry);
+extern void nfs_unblock_sillyrename(struct dentry *dentry);
/*
* linux/fs/nfs/write.c