fs: dcache scale dentry refcount
Make d_count non-atomic and protect it with d_lock. This allows us to ensure a
0 refcount dentry remains 0 without dcache_lock. It is also fairly natural when
we start protecting many other dentry members with d_lock.
Signed-off-by: Nick Piggin <npiggin@kernel.dk>
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 3532b92..29a406a 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -162,7 +162,7 @@
spin_lock(&dcache_lock);
spin_lock(&dentry->d_lock);
if (!(d_unhashed(dentry)) && dentry->d_inode) {
- dget_locked(dentry);
+ dget_locked_dlock(dentry);
__d_drop(dentry);
spin_unlock(&dentry->d_lock);
simple_unlink(dir->d_inode, dentry);
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
index 8c8afc7..18aee04 100644
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -280,7 +280,7 @@
spin_lock(&dcache_lock);
spin_lock(&tmp->d_lock);
if (!(d_unhashed(tmp) && tmp->d_inode)) {
- dget_locked(tmp);
+ dget_locked_dlock(tmp);
__d_drop(tmp);
spin_unlock(&tmp->d_lock);
spin_unlock(&dcache_lock);
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index f99bddc..fe4b242 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -456,7 +456,7 @@
spin_lock(&dcache_lock);
spin_lock(&tmp->d_lock);
if (!(d_unhashed(tmp) && tmp->d_inode)) {
- dget_locked(tmp);
+ dget_locked_dlock(tmp);
__d_drop(tmp);
spin_unlock(&tmp->d_lock);
spin_unlock(&dcache_lock);
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index a796c94..413b564 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -198,7 +198,7 @@
else
ino_count++;
- if (atomic_read(&p->d_count) > ino_count) {
+ if (p->d_count > ino_count) {
top_ino->last_used = jiffies;
dput(p);
return 1;
@@ -347,7 +347,7 @@
/* Path walk currently on this dentry? */
ino_count = atomic_read(&ino->count) + 2;
- if (atomic_read(&dentry->d_count) > ino_count)
+ if (dentry->d_count > ino_count)
goto next;
/* Can we umount this guy */
@@ -369,7 +369,7 @@
if (!exp_leaves) {
/* Path walk currently on this dentry? */
ino_count = atomic_read(&ino->count) + 1;
- if (atomic_read(&dentry->d_count) > ino_count)
+ if (dentry->d_count > ino_count)
goto next;
if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) {
@@ -383,7 +383,7 @@
} else {
/* Path walk currently on this dentry? */
ino_count = atomic_read(&ino->count) + 1;
- if (atomic_read(&dentry->d_count) > ino_count)
+ if (dentry->d_count > ino_count)
goto next;
expired = autofs4_check_leaves(mnt, dentry, timeout, do_now);
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index d34896c..7922509 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -436,7 +436,7 @@
spin_lock(&active->d_lock);
/* Already gone? */
- if (atomic_read(&active->d_count) == 0)
+ if (active->d_count == 0)
goto next;
qstr = &active->d_name;
@@ -452,7 +452,7 @@
goto next;
if (d_unhashed(active)) {
- dget(active);
+ dget_dlock(active);
spin_unlock(&active->d_lock);
spin_unlock(&sbi->lookup_lock);
spin_unlock(&dcache_lock);
@@ -507,7 +507,7 @@
goto next;
if (d_unhashed(expiring)) {
- dget(expiring);
+ dget_dlock(expiring);
spin_unlock(&expiring->d_lock);
spin_unlock(&sbi->lookup_lock);
spin_unlock(&dcache_lock);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index d902948..3ecf915 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -150,7 +150,9 @@
di = ceph_dentry(dentry);
}
- atomic_inc(&dentry->d_count);
+ spin_lock(&dentry->d_lock);
+ dentry->d_count++;
+ spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos,
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index bf12865..bb68c79 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -879,8 +879,8 @@
} else if (realdn) {
dout("dn %p (%d) spliced with %p (%d) "
"inode %p ino %llx.%llx\n",
- dn, atomic_read(&dn->d_count),
- realdn, atomic_read(&realdn->d_count),
+ dn, dn->d_count,
+ realdn, realdn->d_count,
realdn->d_inode, ceph_vinop(realdn->d_inode));
dput(dn);
dn = realdn;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 38800ea..a50fca1 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1486,7 +1486,7 @@
*base = ceph_ino(temp->d_inode);
*plen = len;
dout("build_path on %p %d built %llx '%.*s'\n",
- dentry, atomic_read(&dentry->d_count), *base, len, path);
+ dentry, dentry->d_count, *base, len, path);
return path;
}
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 4cce3b07..9e37e8b 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -559,7 +559,7 @@
if (cii->c_flags & C_FLUSH)
coda_flag_inode_children(inode, C_FLUSH);
- if (atomic_read(&de->d_count) > 1)
+ if (de->d_count > 1)
/* pretend it's valid, but don't change the flags */
goto out;
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 20024a9..e9acea4 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -394,8 +394,7 @@
if (d->d_inode)
simple_rmdir(parent->d_inode,d);
- pr_debug(" o %s removing done (%d)\n",d->d_name.name,
- atomic_read(&d->d_count));
+ pr_debug(" o %s removing done (%d)\n",d->d_name.name, d->d_count);
dput(parent);
}
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 253476d..79b3776 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -253,7 +253,7 @@
spin_lock(&dcache_lock);
spin_lock(&dentry->d_lock);
if (!(d_unhashed(dentry) && dentry->d_inode)) {
- dget_locked(dentry);
+ dget_locked_dlock(dentry);
__d_drop(dentry);
spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
diff --git a/fs/dcache.c b/fs/dcache.c
index 3d3c843..81e9150 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -45,6 +45,7 @@
* - d_flags
* - d_name
* - d_lru
+ * - d_count
*
* Ordering:
* dcache_lock
@@ -125,6 +126,7 @@
*/
static void d_free(struct dentry *dentry)
{
+ BUG_ON(dentry->d_count);
this_cpu_dec(nr_dentry);
if (dentry->d_op && dentry->d_op->d_release)
dentry->d_op->d_release(dentry);
@@ -222,8 +224,11 @@
struct dentry *parent;
list_del(&dentry->d_u.d_child);
- /*drops the locks, at that point nobody can reach this dentry */
dentry_iput(dentry);
+ /*
+ * dentry_iput drops the locks, at which point nobody (except
+ * transient RCU lookups) can reach this dentry.
+ */
if (IS_ROOT(dentry))
parent = NULL;
else
@@ -303,13 +308,23 @@
return;
repeat:
- if (atomic_read(&dentry->d_count) == 1)
+ if (dentry->d_count == 1)
might_sleep();
- if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
- return;
-
spin_lock(&dentry->d_lock);
- if (atomic_read(&dentry->d_count)) {
+ if (dentry->d_count == 1) {
+ if (!spin_trylock(&dcache_lock)) {
+ /*
+ * Something of a livelock possibility we could avoid
+ * by taking dcache_lock and trying again, but we
+ * want to reduce dcache_lock anyway so this will
+ * get improved.
+ */
+ spin_unlock(&dentry->d_lock);
+ goto repeat;
+ }
+ }
+ dentry->d_count--;
+ if (dentry->d_count) {
spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
return;
@@ -389,7 +404,7 @@
* working directory or similar).
*/
spin_lock(&dentry->d_lock);
- if (atomic_read(&dentry->d_count) > 1) {
+ if (dentry->d_count > 1) {
if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) {
spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
@@ -404,29 +419,61 @@
}
EXPORT_SYMBOL(d_invalidate);
-/* This should be called _only_ with dcache_lock held */
+/* This must be called with dcache_lock and d_lock held */
static inline struct dentry * __dget_locked_dlock(struct dentry *dentry)
{
- atomic_inc(&dentry->d_count);
+ dentry->d_count++;
dentry_lru_del(dentry);
return dentry;
}
+/* This should be called _only_ with dcache_lock held */
static inline struct dentry * __dget_locked(struct dentry *dentry)
{
- atomic_inc(&dentry->d_count);
spin_lock(&dentry->d_lock);
- dentry_lru_del(dentry);
+ __dget_locked_dlock(dentry);
spin_unlock(&dentry->d_lock);
return dentry;
}
+struct dentry * dget_locked_dlock(struct dentry *dentry)
+{
+ return __dget_locked_dlock(dentry);
+}
+
struct dentry * dget_locked(struct dentry *dentry)
{
return __dget_locked(dentry);
}
EXPORT_SYMBOL(dget_locked);
+struct dentry *dget_parent(struct dentry *dentry)
+{
+ struct dentry *ret;
+
+repeat:
+ spin_lock(&dentry->d_lock);
+ ret = dentry->d_parent;
+ if (!ret)
+ goto out;
+ if (dentry == ret) {
+ ret->d_count++;
+ goto out;
+ }
+ if (!spin_trylock(&ret->d_lock)) {
+ spin_unlock(&dentry->d_lock);
+ cpu_relax();
+ goto repeat;
+ }
+ BUG_ON(!ret->d_count);
+ ret->d_count++;
+ spin_unlock(&ret->d_lock);
+out:
+ spin_unlock(&dentry->d_lock);
+ return ret;
+}
+EXPORT_SYMBOL(dget_parent);
+
/**
* d_find_alias - grab a hashed alias of inode
* @inode: inode in question
@@ -495,7 +542,7 @@
spin_lock(&dcache_lock);
list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
spin_lock(&dentry->d_lock);
- if (!atomic_read(&dentry->d_count)) {
+ if (!dentry->d_count) {
__dget_locked_dlock(dentry);
__d_drop(dentry);
spin_unlock(&dentry->d_lock);
@@ -530,7 +577,10 @@
*/
while (dentry) {
spin_lock(&dcache_lock);
- if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock)) {
+ spin_lock(&dentry->d_lock);
+ dentry->d_count--;
+ if (dentry->d_count) {
+ spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
return;
}
@@ -562,7 +612,7 @@
* the LRU because of laziness during lookup. Do not free
* it - just keep it off the LRU list.
*/
- if (atomic_read(&dentry->d_count)) {
+ if (dentry->d_count) {
spin_unlock(&dentry->d_lock);
continue;
}
@@ -783,7 +833,7 @@
do {
struct inode *inode;
- if (atomic_read(&dentry->d_count) != 0) {
+ if (dentry->d_count != 0) {
printk(KERN_ERR
"BUG: Dentry %p{i=%lx,n=%s}"
" still in use (%d)"
@@ -792,7 +842,7 @@
dentry->d_inode ?
dentry->d_inode->i_ino : 0UL,
dentry->d_name.name,
- atomic_read(&dentry->d_count),
+ dentry->d_count,
dentry->d_sb->s_type->name,
dentry->d_sb->s_id);
BUG();
@@ -802,7 +852,9 @@
parent = NULL;
else {
parent = dentry->d_parent;
- atomic_dec(&parent->d_count);
+ spin_lock(&parent->d_lock);
+ parent->d_count--;
+ spin_unlock(&parent->d_lock);
}
list_del(&dentry->d_u.d_child);
@@ -853,7 +905,9 @@
dentry = sb->s_root;
sb->s_root = NULL;
- atomic_dec(&dentry->d_count);
+ spin_lock(&dentry->d_lock);
+ dentry->d_count--;
+ spin_unlock(&dentry->d_lock);
shrink_dcache_for_umount_subtree(dentry);
while (!hlist_empty(&sb->s_anon)) {
@@ -950,7 +1004,7 @@
* move only zero ref count dentries to the end
* of the unused list for prune_dcache
*/
- if (!atomic_read(&dentry->d_count)) {
+ if (!dentry->d_count) {
dentry_lru_move_tail(dentry);
found++;
} else {
@@ -1068,7 +1122,7 @@
memcpy(dname, name->name, name->len);
dname[name->len] = 0;
- atomic_set(&dentry->d_count, 1);
+ dentry->d_count = 1;
dentry->d_flags = DCACHE_UNHASHED;
spin_lock_init(&dentry->d_lock);
dentry->d_inode = NULL;
@@ -1556,7 +1610,7 @@
goto next;
}
- atomic_inc(&dentry->d_count);
+ dentry->d_count++;
found = dentry;
spin_unlock(&dentry->d_lock);
break;
@@ -1653,7 +1707,7 @@
spin_lock(&dcache_lock);
spin_lock(&dentry->d_lock);
isdir = S_ISDIR(dentry->d_inode->i_mode);
- if (atomic_read(&dentry->d_count) == 1) {
+ if (dentry->d_count == 1) {
dentry->d_flags &= ~DCACHE_CANT_MOUNT;
dentry_iput(dentry);
fsnotify_nameremove(dentry, isdir);
@@ -2494,11 +2548,15 @@
this_parent = dentry;
goto repeat;
}
- atomic_dec(&dentry->d_count);
+ spin_lock(&dentry->d_lock);
+ dentry->d_count--;
+ spin_unlock(&dentry->d_lock);
}
if (this_parent != root) {
next = this_parent->d_u.d_child.next;
- atomic_dec(&this_parent->d_count);
+ spin_lock(&this_parent->d_lock);
+ this_parent->d_count--;
+ spin_unlock(&this_parent->d_lock);
this_parent = this_parent->d_parent;
goto resume;
}
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index a1ed7a7..5e5c7ec 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -260,7 +260,7 @@
ecryptfs_dentry->d_parent));
lower_inode = lower_dentry->d_inode;
fsstack_copy_attr_atime(ecryptfs_dir_inode, lower_dir_dentry->d_inode);
- BUG_ON(!atomic_read(&lower_dentry->d_count));
+ BUG_ON(!lower_dentry->d_count);
ecryptfs_set_dentry_private(ecryptfs_dentry,
kmem_cache_alloc(ecryptfs_dentry_info_cache,
GFP_KERNEL));
diff --git a/fs/locks.c b/fs/locks.c
index 8729347..08415b2 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1389,7 +1389,7 @@
if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
goto out;
if ((arg == F_WRLCK)
- && ((atomic_read(&dentry->d_count) > 1)
+ && ((dentry->d_count > 1)
|| (atomic_read(&inode->i_count) > 1)))
goto out;
}
diff --git a/fs/namei.c b/fs/namei.c
index f3b5ca4..cbfa5fb 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2133,7 +2133,7 @@
shrink_dcache_parent(dentry);
spin_lock(&dcache_lock);
spin_lock(&dentry->d_lock);
- if (atomic_read(&dentry->d_count) == 2)
+ if (dentry->d_count == 2)
__d_drop(dentry);
spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 9184c7c..12de824 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1720,7 +1720,7 @@
spin_lock(&dcache_lock);
spin_lock(&dentry->d_lock);
- if (atomic_read(&dentry->d_count) > 1) {
+ if (dentry->d_count > 1) {
spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
/* Start asynchronous writeout of the inode */
@@ -1868,7 +1868,7 @@
dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n",
old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
new_dentry->d_parent->d_name.name, new_dentry->d_name.name,
- atomic_read(&new_dentry->d_count));
+ new_dentry->d_count);
/*
* For non-directories, check whether the target is busy and if so,
@@ -1886,7 +1886,7 @@
rehash = new_dentry;
}
- if (atomic_read(&new_dentry->d_count) > 2) {
+ if (new_dentry->d_count > 2) {
int err;
/* copy the target dentry's name */
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 7bdec85..8fe9eb4 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -496,7 +496,7 @@
dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
- atomic_read(&dentry->d_count));
+ dentry->d_count);
nfs_inc_stats(dir, NFSIOS_SILLYRENAME);
/*
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 184938f..3a35902 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1756,8 +1756,7 @@
goto out_dput_new;
if (svc_msnfs(ffhp) &&
- ((atomic_read(&odentry->d_count) > 1)
- || (atomic_read(&ndentry->d_count) > 1))) {
+ ((odentry->d_count > 1) || (ndentry->d_count > 1))) {
host_err = -EPERM;
goto out_dput_new;
}
@@ -1843,7 +1842,7 @@
if (type != S_IFDIR) { /* It's UNLINK */
#ifdef MSNFS
if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
- (atomic_read(&rdentry->d_count) > 1)) {
+ (rdentry->d_count > 1)) {
host_err = -EPERM;
} else
#endif
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index f804d41..d36fc7e 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -838,7 +838,7 @@
static int nilfs_tree_was_touched(struct dentry *root_dentry)
{
- return atomic_read(&root_dentry->d_count) > 1;
+ return root_dentry->d_count > 1;
}
/**
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 2feb624..b0ade2d 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -87,7 +87,7 @@
#endif
struct dentry {
- atomic_t d_count;
+ unsigned int d_count; /* protected by d_lock */
unsigned int d_flags; /* protected by d_lock */
spinlock_t d_lock; /* per dentry lock */
int d_mounted;
@@ -297,17 +297,28 @@
* needs and they take necessary precautions) you should hold dcache_lock
* and call dget_locked() instead of dget().
*/
-
+static inline struct dentry *dget_dlock(struct dentry *dentry)
+{
+ if (dentry) {
+ BUG_ON(!dentry->d_count);
+ dentry->d_count++;
+ }
+ return dentry;
+}
static inline struct dentry *dget(struct dentry *dentry)
{
if (dentry) {
- BUG_ON(!atomic_read(&dentry->d_count));
- atomic_inc(&dentry->d_count);
+ spin_lock(&dentry->d_lock);
+ dget_dlock(dentry);
+ spin_unlock(&dentry->d_lock);
}
return dentry;
}
extern struct dentry * dget_locked(struct dentry *);
+extern struct dentry * dget_locked_dlock(struct dentry *);
+
+extern struct dentry *dget_parent(struct dentry *dentry);
/**
* d_unhashed - is dentry hashed
@@ -338,16 +349,6 @@
spin_unlock(&dentry->d_lock);
}
-static inline struct dentry *dget_parent(struct dentry *dentry)
-{
- struct dentry *ret;
-
- spin_lock(&dentry->d_lock);
- ret = dget(dentry->d_parent);
- spin_unlock(&dentry->d_lock);
- return ret;
-}
-
extern void dput(struct dentry *);
static inline int d_mountpoint(struct dentry *dentry)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 746055b..eb7af39 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3655,9 +3655,7 @@
list_del(&cgrp->sibling);
cgroup_unlock_hierarchy(cgrp->root);
- spin_lock(&cgrp->dentry->d_lock);
d = dget(cgrp->dentry);
- spin_unlock(&d->d_lock);
cgroup_d_remove_dir(d);
dput(d);