Commit b04f784e authored by Nick Piggin's avatar Nick Piggin Committed by Al Viro

fs: remove extra lookup in __lookup_hash

fs: remove extra lookup in __lookup_hash

Optimize lookup for create operations, where no dentry should often be
common-case. In cases where it is not, such as unlink, the added overhead
is much smaller than the removed.

Also, move comments about __d_lookup racyness to the __d_lookup call site.
d_lookup is intuitive; __d_lookup is what needs commenting. So in that same
vein, add kerneldoc comments to __d_lookup and clean up some of the comments:

- We are interested in how the RCU lookup works here, particularly with
  renames. Make that explicit, and point to the document where it is explained
  in more detail.
- RCU is pretty standard now, and macros make implementations pretty mindless.
  If we want to know about RCU barrier details, we look in RCU code.
- Delete some boring legacy comments because we don't care much about how the
  code used to work, more about the interesting parts of how it works now. So
  comments about lazy LRU may be interesting, but would better be done in the
  LRU or refcount management code.
Signed-off-by: default avatarNick Piggin <npiggin@kernel.dk>
Signed-off-by: default avatarAl Viro <viro@zeniv.linux.org.uk>
parent 2a4419b5
...@@ -1332,31 +1332,13 @@ EXPORT_SYMBOL(d_add_ci); ...@@ -1332,31 +1332,13 @@ EXPORT_SYMBOL(d_add_ci);
* d_lookup - search for a dentry * d_lookup - search for a dentry
* @parent: parent dentry * @parent: parent dentry
* @name: qstr of name we wish to find * @name: qstr of name we wish to find
* Returns: dentry, or NULL
* *
* Searches the children of the parent dentry for the name in question. If * d_lookup searches the children of the parent dentry for the name in
* the dentry is found its reference count is incremented and the dentry * question. If the dentry is found its reference count is incremented and the
* is returned. The caller must use dput to free the entry when it has * dentry is returned. The caller must use dput to free the entry when it has
* finished using it. %NULL is returned on failure. * finished using it. %NULL is returned if the dentry does not exist.
*
* __d_lookup is dcache_lock free. The hash list is protected using RCU.
* Memory barriers are used while updating and doing lockless traversal.
* To avoid races with d_move while rename is happening, d_lock is used.
*
* Overflows in memcmp(), while d_move, are avoided by keeping the length
* and name pointer in one structure pointed by d_qstr.
*
* rcu_read_lock() and rcu_read_unlock() are used to disable preemption while
* lookup is going on.
*
* The dentry unused LRU is not updated even if lookup finds the required dentry
* in there. It is updated in places such as prune_dcache, shrink_dcache_sb,
* select_parent and __dget_locked. This laziness saves lookup from dcache_lock
* acquisition.
*
* d_lookup() is protected against the concurrent renames in some unrelated
* directory using the seqlockt_t rename_lock.
*/ */
struct dentry * d_lookup(struct dentry * parent, struct qstr * name) struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
{ {
struct dentry * dentry = NULL; struct dentry * dentry = NULL;
...@@ -1372,6 +1354,21 @@ struct dentry * d_lookup(struct dentry * parent, struct qstr * name) ...@@ -1372,6 +1354,21 @@ struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
} }
EXPORT_SYMBOL(d_lookup); EXPORT_SYMBOL(d_lookup);
/*
* __d_lookup - search for a dentry (racy)
* @parent: parent dentry
* @name: qstr of name we wish to find
* Returns: dentry, or NULL
*
* __d_lookup is like d_lookup, however it may (rarely) return a
* false-negative result due to unrelated rename activity.
*
* __d_lookup is slightly faster by avoiding rename_lock read seqlock,
* however it must be used carefully, eg. with a following d_lookup in
* the case of failure.
*
* __d_lookup callers must be commented.
*/
struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
{ {
unsigned int len = name->len; unsigned int len = name->len;
...@@ -1382,6 +1379,19 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) ...@@ -1382,6 +1379,19 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
struct hlist_node *node; struct hlist_node *node;
struct dentry *dentry; struct dentry *dentry;
/*
* The hash list is protected using RCU.
*
* Take d_lock when comparing a candidate dentry, to avoid races
* with d_move().
*
* It is possible that concurrent renames can mess up our list
* walk here and result in missing our dentry, resulting in the
* false-negative result. d_lookup() protects against concurrent
* renames using rename_lock seqlock.
*
* See Documentation/vfs/dcache-locking.txt for more details.
*/
rcu_read_lock(); rcu_read_lock();
hlist_for_each_entry_rcu(dentry, node, head, d_hash) { hlist_for_each_entry_rcu(dentry, node, head, d_hash) {
...@@ -1396,8 +1406,8 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) ...@@ -1396,8 +1406,8 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
/* /*
* Recheck the dentry after taking the lock - d_move may have * Recheck the dentry after taking the lock - d_move may have
* changed things. Don't bother checking the hash because we're * changed things. Don't bother checking the hash because
* about to compare the whole name anyway. * we're about to compare the whole name anyway.
*/ */
if (dentry->d_parent != parent) if (dentry->d_parent != parent)
goto next; goto next;
......
...@@ -735,6 +735,11 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, ...@@ -735,6 +735,11 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
return err; return err;
} }
/*
* Rename seqlock is not required here because in the off chance
* of a false negative due to a concurrent rename, we're going to
* do the non-racy lookup, below.
*/
dentry = __d_lookup(nd->path.dentry, name); dentry = __d_lookup(nd->path.dentry, name);
if (!dentry) if (!dentry)
goto need_lookup; goto need_lookup;
...@@ -754,17 +759,13 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, ...@@ -754,17 +759,13 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
mutex_lock(&dir->i_mutex); mutex_lock(&dir->i_mutex);
/* /*
* First re-do the cached lookup just in case it was created * First re-do the cached lookup just in case it was created
* while we waited for the directory semaphore.. * while we waited for the directory semaphore, or the first
* * lookup failed due to an unrelated rename.
* FIXME! This could use version numbering or similar to
* avoid unnecessary cache lookups.
*
* The "dcache_lock" is purely to protect the RCU list walker
* from concurrent renames at this point (we mustn't get false
* negatives from the RCU list walk here, unlike the optimistic
* fast walk).
* *
* so doing d_lookup() (with seqlock), instead of lockfree __d_lookup * This could use version numbering or similar to avoid unnecessary
* cache lookups, but then we'd have to do the first lookup in the
* non-racy way. However in the common case here, everything should
* be hot in cache, so would it be a big win?
*/ */
dentry = d_lookup(parent, name); dentry = d_lookup(parent, name);
if (likely(!dentry)) { if (likely(!dentry)) {
...@@ -1136,13 +1137,12 @@ static struct dentry *__lookup_hash(struct qstr *name, ...@@ -1136,13 +1137,12 @@ static struct dentry *__lookup_hash(struct qstr *name,
goto out; goto out;
} }
dentry = __d_lookup(base, name); /*
* Don't bother with __d_lookup: callers are for creat as
/* lockess __d_lookup may fail due to concurrent d_move() * well as unlink, so a lot of the time it would cost
* in some unrelated directory, so try with d_lookup * a double lookup.
*/ */
if (!dentry) dentry = d_lookup(base, name);
dentry = d_lookup(base, name);
if (dentry && dentry->d_op && dentry->d_op->d_revalidate) if (dentry && dentry->d_op && dentry->d_op->d_revalidate)
dentry = do_revalidate(dentry, nd); dentry = do_revalidate(dentry, nd);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment