return dentry_hashtable + hash_32(hash, d_hash_shift);
}
+#define IN_LOOKUP_SHIFT 10
+static struct hlist_bl_head in_lookup_hashtable[1 << IN_LOOKUP_SHIFT];
+
+static inline struct hlist_bl_head *in_lookup_hash(const struct dentry *parent,
+ unsigned int hash)
+{
+ hash += (unsigned long) parent / L1_CACHE_BYTES;
+ return in_lookup_hashtable + hash_32(hash, IN_LOOKUP_SHIFT);
+}
+
+
/* Statistics gathering. */
struct dentry_stat_t dentry_stat = {
.age_limit = 45,
/* Slow case: now with the dentry lock held */
rcu_read_unlock();
+ WARN_ON(d_in_lookup(dentry));
+
/* Unreachable? Get rid of it */
if (unlikely(d_unhashed(dentry)))
goto kill_it;
* be overwriting an internal NUL character
*/
dentry->d_iname[DNAME_INLINE_LEN-1] = 0;
- if (name->len > DNAME_INLINE_LEN-1) {
+ if (unlikely(!name)) {
+ static const struct qstr anon = QSTR_INIT("/", 1);
+ name = &anon;
+ dname = dentry->d_iname;
+ } else if (name->len > DNAME_INLINE_LEN-1) {
size_t size = offsetof(struct external_name, name[1]);
struct external_name *p = kmalloc(size + name->len,
GFP_KERNEL_ACCOUNT);
struct qstr q;
q.name = name;
- q.len = strlen(name);
- q.hash = full_name_hash(q.name, q.len);
+ q.hash_len = hashlen_string(name);
return d_alloc(parent, &q);
}
EXPORT_SYMBOL(d_alloc_name);
static void __d_instantiate(struct dentry *dentry, struct inode *inode)
{
unsigned add_flags = d_flags_for_inode(inode);
+ WARN_ON(d_in_lookup(dentry));
spin_lock(&dentry->d_lock);
hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
{
BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
if (inode) {
+ security_d_instantiate(entry, inode);
spin_lock(&inode->i_lock);
__d_instantiate(entry, inode);
spin_unlock(&inode->i_lock);
}
- security_d_instantiate(entry, inode);
}
EXPORT_SYMBOL(d_instantiate);
{
BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
+ security_d_instantiate(entry, inode);
spin_lock(&inode->i_lock);
if (S_ISDIR(inode->i_mode) && !hlist_empty(&inode->i_dentry)) {
spin_unlock(&inode->i_lock);
}
__d_instantiate(entry, inode);
spin_unlock(&inode->i_lock);
- security_d_instantiate(entry, inode);
return 0;
}
struct dentry *res = NULL;
if (root_inode) {
- static const struct qstr name = QSTR_INIT("/", 1);
-
- res = __d_alloc(root_inode->i_sb, &name);
+ res = __d_alloc(root_inode->i_sb, NULL);
if (res)
d_instantiate(res, root_inode);
else
static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected)
{
- static const struct qstr anonstring = QSTR_INIT("/", 1);
struct dentry *tmp;
struct dentry *res;
unsigned add_flags;
if (res)
goto out_iput;
- tmp = __d_alloc(inode->i_sb, &anonstring);
+ tmp = __d_alloc(inode->i_sb, NULL);
if (!tmp) {
res = ERR_PTR(-ENOMEM);
goto out_iput;
}
+ security_d_instantiate(tmp, inode);
spin_lock(&inode->i_lock);
res = __d_find_any_alias(inode);
if (res) {
hlist_bl_unlock(&tmp->d_sb->s_anon);
spin_unlock(&tmp->d_lock);
spin_unlock(&inode->i_lock);
- security_d_instantiate(tmp, inode);
return tmp;
out_iput:
- if (res && !IS_ERR(res))
- security_d_instantiate(res, inode);
iput(inode);
return res;
}
struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
struct qstr *name)
{
- struct dentry *found;
- struct dentry *new;
+ struct dentry *found, *res;
/*
* First check if a dentry matching the name already exists,
* if not go ahead and create it now.
*/
found = d_hash_and_lookup(dentry->d_parent, name);
- if (!found) {
- new = d_alloc(dentry->d_parent, name);
- if (!new) {
- found = ERR_PTR(-ENOMEM);
- } else {
- found = d_splice_alias(inode, new);
- if (found) {
- dput(new);
- return found;
- }
- return new;
+ if (found) {
+ iput(inode);
+ return found;
+ }
+ if (d_in_lookup(dentry)) {
+ found = d_alloc_parallel(dentry->d_parent, name,
+ dentry->d_wait);
+ if (IS_ERR(found) || !d_in_lookup(found)) {
+ iput(inode);
+ return found;
}
+ } else {
+ found = d_alloc(dentry->d_parent, name);
+ if (!found) {
+ iput(inode);
+ return ERR_PTR(-ENOMEM);
+ }
+ }
+ res = d_splice_alias(inode, found);
+ if (res) {
+ dput(found);
+ return res;
}
- iput(inode);
return found;
}
EXPORT_SYMBOL(d_add_ci);
}
EXPORT_SYMBOL(d_rehash);
+static inline unsigned start_dir_add(struct inode *dir)
+{
+
+ for (;;) {
+ unsigned n = dir->i_dir_seq;
+ if (!(n & 1) && cmpxchg(&dir->i_dir_seq, n, n + 1) == n)
+ return n;
+ cpu_relax();
+ }
+}
+
+static inline void end_dir_add(struct inode *dir, unsigned n)
+{
+ smp_store_release(&dir->i_dir_seq, n + 2);
+}
+
+static void d_wait_lookup(struct dentry *dentry)
+{
+ if (d_in_lookup(dentry)) {
+ DECLARE_WAITQUEUE(wait, current);
+ add_wait_queue(dentry->d_wait, &wait);
+ do {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ spin_unlock(&dentry->d_lock);
+ schedule();
+ spin_lock(&dentry->d_lock);
+ } while (d_in_lookup(dentry));
+ }
+}
+
+struct dentry *d_alloc_parallel(struct dentry *parent,
+ const struct qstr *name,
+ wait_queue_head_t *wq)
+{
+ unsigned int len = name->len;
+ unsigned int hash = name->hash;
+ const unsigned char *str = name->name;
+ struct hlist_bl_head *b = in_lookup_hash(parent, hash);
+ struct hlist_bl_node *node;
+ struct dentry *new = d_alloc(parent, name);
+ struct dentry *dentry;
+ unsigned seq, r_seq, d_seq;
+
+ if (unlikely(!new))
+ return ERR_PTR(-ENOMEM);
+
+retry:
+ rcu_read_lock();
+ seq = smp_load_acquire(&parent->d_inode->i_dir_seq) & ~1;
+ r_seq = read_seqbegin(&rename_lock);
+ dentry = __d_lookup_rcu(parent, name, &d_seq);
+ if (unlikely(dentry)) {
+ if (!lockref_get_not_dead(&dentry->d_lockref)) {
+ rcu_read_unlock();
+ goto retry;
+ }
+ if (read_seqcount_retry(&dentry->d_seq, d_seq)) {
+ rcu_read_unlock();
+ dput(dentry);
+ goto retry;
+ }
+ rcu_read_unlock();
+ dput(new);
+ return dentry;
+ }
+ if (unlikely(read_seqretry(&rename_lock, r_seq))) {
+ rcu_read_unlock();
+ goto retry;
+ }
+ hlist_bl_lock(b);
+ if (unlikely(parent->d_inode->i_dir_seq != seq)) {
+ hlist_bl_unlock(b);
+ rcu_read_unlock();
+ goto retry;
+ }
+ rcu_read_unlock();
+ /*
+ * No changes for the parent since the beginning of d_lookup().
+ * Since all removals from the chain happen with hlist_bl_lock(),
+ * any potential in-lookup matches are going to stay here until
+ * we unlock the chain. All fields are stable in everything
+ * we encounter.
+ */
+ hlist_bl_for_each_entry(dentry, node, b, d_u.d_in_lookup_hash) {
+ if (dentry->d_name.hash != hash)
+ continue;
+ if (dentry->d_parent != parent)
+ continue;
+ if (d_unhashed(dentry))
+ continue;
+ if (parent->d_flags & DCACHE_OP_COMPARE) {
+ int tlen = dentry->d_name.len;
+ const char *tname = dentry->d_name.name;
+ if (parent->d_op->d_compare(parent, dentry, tlen, tname, name))
+ continue;
+ } else {
+ if (dentry->d_name.len != len)
+ continue;
+ if (dentry_cmp(dentry, str, len))
+ continue;
+ }
+ dget(dentry);
+ hlist_bl_unlock(b);
+ /* somebody is doing lookup for it right now; wait for it */
+ spin_lock(&dentry->d_lock);
+ d_wait_lookup(dentry);
+ /*
+ * it's not in-lookup anymore; in principle we should repeat
+ * everything from dcache lookup, but it's likely to be what
+ * d_lookup() would've found anyway. If it is, just return it;
+ * otherwise we really have to repeat the whole thing.
+ */
+ if (unlikely(dentry->d_name.hash != hash))
+ goto mismatch;
+ if (unlikely(dentry->d_parent != parent))
+ goto mismatch;
+ if (unlikely(d_unhashed(dentry)))
+ goto mismatch;
+ if (parent->d_flags & DCACHE_OP_COMPARE) {
+ int tlen = dentry->d_name.len;
+ const char *tname = dentry->d_name.name;
+ if (parent->d_op->d_compare(parent, dentry, tlen, tname, name))
+ goto mismatch;
+ } else {
+ if (unlikely(dentry->d_name.len != len))
+ goto mismatch;
+ if (unlikely(dentry_cmp(dentry, str, len)))
+ goto mismatch;
+ }
+ /* OK, it *is* a hashed match; return it */
+ spin_unlock(&dentry->d_lock);
+ dput(new);
+ return dentry;
+ }
+ /* we can't take ->d_lock here; it's OK, though. */
+ new->d_flags |= DCACHE_PAR_LOOKUP;
+ new->d_wait = wq;
+ hlist_bl_add_head_rcu(&new->d_u.d_in_lookup_hash, b);
+ hlist_bl_unlock(b);
+ return new;
+mismatch:
+ spin_unlock(&dentry->d_lock);
+ dput(dentry);
+ goto retry;
+}
+EXPORT_SYMBOL(d_alloc_parallel);
+
+void __d_lookup_done(struct dentry *dentry)
+{
+ struct hlist_bl_head *b = in_lookup_hash(dentry->d_parent,
+ dentry->d_name.hash);
+ hlist_bl_lock(b);
+ dentry->d_flags &= ~DCACHE_PAR_LOOKUP;
+ __hlist_bl_del(&dentry->d_u.d_in_lookup_hash);
+ wake_up_all(dentry->d_wait);
+ dentry->d_wait = NULL;
+ hlist_bl_unlock(b);
+ INIT_HLIST_NODE(&dentry->d_u.d_alias);
+ INIT_LIST_HEAD(&dentry->d_lru);
+}
+EXPORT_SYMBOL(__d_lookup_done);
/* inode->i_lock held if inode is non-NULL */
static inline void __d_add(struct dentry *dentry, struct inode *inode)
{
+ struct inode *dir = NULL;
+ unsigned n;
+ spin_lock(&dentry->d_lock);
+ if (unlikely(d_in_lookup(dentry))) {
+ dir = dentry->d_parent->d_inode;
+ n = start_dir_add(dir);
+ __d_lookup_done(dentry);
+ }
if (inode) {
- __d_instantiate(dentry, inode);
- spin_unlock(&inode->i_lock);
+ unsigned add_flags = d_flags_for_inode(inode);
+ hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
+ raw_write_seqcount_begin(&dentry->d_seq);
+ __d_set_inode_and_type(dentry, inode, add_flags);
+ raw_write_seqcount_end(&dentry->d_seq);
+ __fsnotify_d_instantiate(dentry);
}
- security_d_instantiate(dentry, inode);
- d_rehash(dentry);
+ _d_rehash(dentry);
+ if (dir)
+ end_dir_add(dir, n);
+ spin_unlock(&dentry->d_lock);
+ if (inode)
+ spin_unlock(&inode->i_lock);
}
/**
void d_add(struct dentry *entry, struct inode *inode)
{
- if (inode)
+ if (inode) {
+ security_d_instantiate(entry, inode);
spin_lock(&inode->i_lock);
+ }
__d_add(entry, inode);
}
EXPORT_SYMBOL(d_add);
static void __d_move(struct dentry *dentry, struct dentry *target,
bool exchange)
{
+ struct inode *dir = NULL;
+ unsigned n;
if (!dentry->d_inode)
printk(KERN_WARNING "VFS: moving negative dcache entry\n");
BUG_ON(d_ancestor(target, dentry));
dentry_lock_for_move(dentry, target);
+ if (unlikely(d_in_lookup(target))) {
+ dir = target->d_parent->d_inode;
+ n = start_dir_add(dir);
+ __d_lookup_done(target);
+ }
write_seqcount_begin(&dentry->d_seq);
write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED);
write_seqcount_end(&target->d_seq);
write_seqcount_end(&dentry->d_seq);
+ if (dir)
+ end_dir_add(dir, n);
dentry_unlock_for_move(dentry, target);
}
static int __d_unalias(struct inode *inode,
struct dentry *dentry, struct dentry *alias)
{
- struct mutex *m1 = NULL, *m2 = NULL;
+ struct mutex *m1 = NULL;
+ struct rw_semaphore *m2 = NULL;
int ret = -ESTALE;
/* If alias and dentry share a parent, then no extra locks required */
if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
goto out_err;
m1 = &dentry->d_sb->s_vfs_rename_mutex;
- if (!inode_trylock(alias->d_parent->d_inode))
+ if (!inode_trylock_shared(alias->d_parent->d_inode))
goto out_err;
- m2 = &alias->d_parent->d_inode->i_mutex;
+ m2 = &alias->d_parent->d_inode->i_rwsem;
out_unalias:
__d_move(alias, dentry, false);
ret = 0;
out_err:
if (m2)
- mutex_unlock(m2);
+ up_read(m2);
if (m1)
mutex_unlock(m1);
return ret;
if (!inode)
goto out;
+ security_d_instantiate(dentry, inode);
spin_lock(&inode->i_lock);
if (S_ISDIR(inode->i_mode)) {
struct dentry *new = __d_find_any_alias(inode);
} else {
__d_move(new, dentry, false);
write_sequnlock(&rename_lock);
- security_d_instantiate(new, inode);
}
iput(inode);
return new;
#include <linux/fs_struct.h>
#include <linux/posix_acl.h>
#include <linux/hash.h>
+ #include <linux/bitops.h>
#include <asm/uaccess.h>
#include "internal.h"
if (!acl)
return -EAGAIN;
/* no ->get_acl() calls in RCU mode... */
- if (acl == ACL_NOT_CACHED)
+ if (is_uncached_acl(acl))
return -ECHILD;
return posix_acl_permission(inode, acl, mask & ~MAY_NOT_BLOCK);
}
struct dentry *dir,
unsigned int flags)
{
- struct dentry *dentry;
- inode_lock(dir->d_inode);
- dentry = d_lookup(dir, name);
- if (unlikely(dentry)) {
+ struct dentry *dentry = ERR_PTR(-ENOENT), *old;
+ struct inode *inode = dir->d_inode;
+ DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+
+ inode_lock_shared(inode);
+ /* Don't go there if it's already dead */
+ if (unlikely(IS_DEADDIR(inode)))
+ goto out;
+again:
+ dentry = d_alloc_parallel(dir, name, &wq);
+ if (IS_ERR(dentry))
+ goto out;
+ if (unlikely(!d_in_lookup(dentry))) {
if ((dentry->d_flags & DCACHE_OP_REVALIDATE) &&
!(flags & LOOKUP_NO_REVAL)) {
int error = d_revalidate(dentry, flags);
if (unlikely(error <= 0)) {
- if (!error)
+ if (!error) {
d_invalidate(dentry);
+ dput(dentry);
+ goto again;
+ }
dput(dentry);
dentry = ERR_PTR(error);
}
}
- if (dentry) {
- inode_unlock(dir->d_inode);
- return dentry;
+ } else {
+ old = inode->i_op->lookup(inode, dentry, flags);
+ d_lookup_done(dentry);
+ if (unlikely(old)) {
+ dput(dentry);
+ dentry = old;
}
}
- dentry = d_alloc(dir, name);
- if (unlikely(!dentry)) {
- inode_unlock(dir->d_inode);
- return ERR_PTR(-ENOMEM);
- }
- dentry = lookup_real(dir->d_inode, dentry, flags);
- inode_unlock(dir->d_inode);
+out:
+ inode_unlock_shared(inode);
return dentry;
}
#include <asm/word-at-a-time.h>
- #ifdef CONFIG_64BIT
+ #ifdef HASH_MIX
- static inline unsigned int fold_hash(unsigned long hash)
- {
- return hash_64(hash, 32);
- }
+ /* Architecture provides HASH_MIX and fold_hash() in <asm/hash.h> */
+ #elif defined(CONFIG_64BIT)
/*
- * This is George Marsaglia's XORSHIFT generator.
- * It implements a maximum-period LFSR in only a few
- * instructions. It also has the property (required
- * by hash_name()) that mix_hash(0) = 0.
+ * Register pressure in the mixing function is an issue, particularly
+ * on 32-bit x86, but almost any function requires one state value and
+ * one temporary. Instead, use a function designed for two state values
+ * and no temporaries.
+ *
+ * This function cannot create a collision in only two iterations, so
+ * we have two iterations to achieve avalanche. In those two iterations,
+ * we have six layers of mixing, which is enough to spread one bit's
+ * influence out to 2^6 = 64 state bits.
+ *
+ * Rotate constants are scored by considering either 64 one-bit input
+ * deltas or 64*63/2 = 2016 two-bit input deltas, and finding the
+ * probability of that delta causing a change to each of the 128 output
+ * bits, using a sample of random initial states.
+ *
+ * The Shannon entropy of the computed probabilities is then summed
+ * to produce a score. Ideally, any input change has a 50% chance of
+ * toggling any given output bit.
+ *
+ * Mixing scores (in bits) for (12,45):
+ * Input delta: 1-bit 2-bit
+ * 1 round: 713.3 42542.6
+ * 2 rounds: 2753.7 140389.8
+ * 3 rounds: 5954.1 233458.2
+ * 4 rounds: 7862.6 256672.2
+ * Perfect: 8192 258048
+ * (64*128) (64*63/2 * 128)
*/
- static inline unsigned long mix_hash(unsigned long hash)
+ #define HASH_MIX(x, y, a) \
+ ( x ^= (a), \
+ y ^= x, x = rol64(x,12),\
+ x += y, y = rol64(y,45),\
+ y *= 9 )
+
+ /*
+ * Fold two longs into one 32-bit hash value. This must be fast, but
+ * latency isn't quite as critical, as there is a fair bit of additional
+ * work done before the hash value is used.
+ */
+ static inline unsigned int fold_hash(unsigned long x, unsigned long y)
{
- hash ^= hash << 13;
- hash ^= hash >> 7;
- hash ^= hash << 17;
- return hash;
+ y ^= x * GOLDEN_RATIO_64;
+ y *= GOLDEN_RATIO_64;
+ return y >> 32;
}
#else /* 32-bit case */
- #define fold_hash(x) (x)
+ /*
+ * Mixing scores (in bits) for (7,20):
+ * Input delta: 1-bit 2-bit
+ * 1 round: 330.3 9201.6
+ * 2 rounds: 1246.4 25475.4
+ * 3 rounds: 1907.1 31295.1
+ * 4 rounds: 2042.3 31718.6
+ * Perfect: 2048 31744
+ * (32*64) (32*31/2 * 64)
+ */
+ #define HASH_MIX(x, y, a) \
+ ( x ^= (a), \
+ y ^= x, x = rol32(x, 7),\
+ x += y, y = rol32(y,20),\
+ y *= 9 )
- static inline unsigned long mix_hash(unsigned long hash)
+ static inline unsigned int fold_hash(unsigned long x, unsigned long y)
{
- hash ^= hash << 13;
- hash ^= hash >> 17;
- hash ^= hash << 5;
- return hash;
+ /* Use arch-optimized multiply if one exists */
+ return __hash_32(y ^ __hash_32(x));
}
#endif
- unsigned int full_name_hash(const unsigned char *name, unsigned int len)
+ /*
+ * Return the hash of a string of known length. This is carfully
+ * designed to match hash_name(), which is the more critical function.
+ * In particular, we must end by hashing a final word containing 0..7
+ * payload bytes, to match the way that hash_name() iterates until it
+ * finds the delimiter after the name.
+ */
+ unsigned int full_name_hash(const char *name, unsigned int len)
{
- unsigned long a, hash = 0;
+ unsigned long a, x = 0, y = 0;
for (;;) {
+ if (!len)
+ goto done;
a = load_unaligned_zeropad(name);
if (len < sizeof(unsigned long))
break;
- hash = mix_hash(hash + a);
+ HASH_MIX(x, y, a);
name += sizeof(unsigned long);
len -= sizeof(unsigned long);
- if (!len)
- goto done;
}
- hash += a & bytemask_from_count(len);
+ x ^= a & bytemask_from_count(len);
done:
- return fold_hash(hash);
+ return fold_hash(x, y);
}
EXPORT_SYMBOL(full_name_hash);
+ /* Return the "hash_len" (hash and length) of a null-terminated string */
+ u64 hashlen_string(const char *name)
+ {
+ unsigned long a = 0, x = 0, y = 0, adata, mask, len;
+ const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
+
+ len = -sizeof(unsigned long);
+ do {
+ HASH_MIX(x, y, a);
+ len += sizeof(unsigned long);
+ a = load_unaligned_zeropad(name+len);
+ } while (!has_zero(a, &adata, &constants));
+
+ adata = prep_zero_mask(a, adata, &constants);
+ mask = create_zero_mask(adata);
+ x ^= a & zero_bytemask(mask);
+
+ return hashlen_create(fold_hash(x, y), len + find_zero(mask));
+ }
+ EXPORT_SYMBOL(hashlen_string);
+
/*
* Calculate the length and hash of the path component, and
* return the "hash_len" as the result.
*/
static inline u64 hash_name(const char *name)
{
- unsigned long a, b, adata, bdata, mask, hash, len;
+ unsigned long a = 0, b, x = 0, y = 0, adata, bdata, mask, len;
const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
- hash = a = 0;
len = -sizeof(unsigned long);
do {
- hash = mix_hash(hash + a);
+ HASH_MIX(x, y, a);
len += sizeof(unsigned long);
a = load_unaligned_zeropad(name+len);
b = a ^ REPEAT_BYTE('/');
adata = prep_zero_mask(a, adata, &constants);
bdata = prep_zero_mask(b, bdata, &constants);
-
mask = create_zero_mask(adata | bdata);
+ x ^= a & zero_bytemask(mask);
- hash += a & zero_bytemask(mask);
- len += find_zero(mask);
- return hashlen_create(fold_hash(hash), len);
+ return hashlen_create(fold_hash(x, y), len + find_zero(mask));
}
- #else
+ #else /* !CONFIG_DCACHE_WORD_ACCESS: Slow, byte-at-a-time version */
- unsigned int full_name_hash(const unsigned char *name, unsigned int len)
+ /* Return the hash of a string of known length */
+ unsigned int full_name_hash(const char *name, unsigned int len)
{
unsigned long hash = init_name_hash();
while (len--)
- hash = partial_name_hash(*name++, hash);
+ hash = partial_name_hash((unsigned char)*name++, hash);
return end_name_hash(hash);
}
EXPORT_SYMBOL(full_name_hash);
+ /* Return the "hash_len" (hash and length) of a null-terminated string */
+ u64 hash_string(const char *name)
+ {
+ unsigned long hash = init_name_hash();
+ unsigned long len = 0, c;
+
+ c = (unsigned char)*name;
+ do {
+ len++;
+ hash = partial_name_hash(c, hash);
+ c = (unsigned char)name[len];
+ } while (c);
+ return hashlen_create(end_name_hash(hash), len);
+ }
+ EXPORT_SYMBOL(hash_string);
+
/*
* We know there's a real path component here of at least
* one character.
int type;
err = may_lookup(nd);
- if (err)
+ if (err)
return err;
hash_len = hash_name(name);
return NULL;
}
- mutex_lock(&p1->d_inode->i_sb->s_vfs_rename_mutex);
+ mutex_lock(&p1->d_sb->s_vfs_rename_mutex);
p = d_ancestor(p2, p1);
if (p) {
inode_unlock(p1->d_inode);
if (p1 != p2) {
inode_unlock(p2->d_inode);
- mutex_unlock(&p1->d_inode->i_sb->s_vfs_rename_mutex);
+ mutex_unlock(&p1->d_sb->s_vfs_rename_mutex);
}
}
EXPORT_SYMBOL(unlock_rename);
return flag;
}
-static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode)
+static int may_o_create(const struct path *dir, struct dentry *dentry, umode_t mode)
{
int error = security_path_mknod(dir, dentry, mode, 0);
if (error)
static int atomic_open(struct nameidata *nd, struct dentry *dentry,
struct path *path, struct file *file,
const struct open_flags *op,
- bool got_write, bool need_lookup,
+ int open_flag, umode_t mode,
int *opened)
{
+ struct dentry *const DENTRY_NOT_SET = (void *) -1UL;
struct inode *dir = nd->path.dentry->d_inode;
- unsigned open_flag = open_to_namei_flags(op->open_flag);
- umode_t mode;
int error;
- int acc_mode;
- int create_error = 0;
- struct dentry *const DENTRY_NOT_SET = (void *) -1UL;
- bool excl;
-
- BUG_ON(dentry->d_inode);
-
- /* Don't create child dentry for a dead directory. */
- if (unlikely(IS_DEADDIR(dir))) {
- error = -ENOENT;
- goto out;
- }
- mode = op->mode;
- if ((open_flag & O_CREAT) && !IS_POSIXACL(dir))
- mode &= ~current_umask();
-
- excl = (open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT);
- if (excl)
+ if (!(~open_flag & (O_EXCL | O_CREAT))) /* both O_EXCL and O_CREAT */
open_flag &= ~O_TRUNC;
- /*
- * Checking write permission is tricky, bacuse we don't know if we are
- * going to actually need it: O_CREAT opens should work as long as the
- * file exists. But checking existence breaks atomicity. The trick is
- * to check access and if not granted clear O_CREAT from the flags.
- *
- * Another problem is returing the "right" error value (e.g. for an
- * O_EXCL open we want to return EEXIST not EROFS).
- */
- if (((open_flag & (O_CREAT | O_TRUNC)) ||
- (open_flag & O_ACCMODE) != O_RDONLY) && unlikely(!got_write)) {
- if (!(open_flag & O_CREAT)) {
- /*
- * No O_CREATE -> atomicity not a requirement -> fall
- * back to lookup + open
- */
- goto no_open;
- } else if (open_flag & (O_EXCL | O_TRUNC)) {
- /* Fall back and fail with the right error */
- create_error = -EROFS;
- goto no_open;
- } else {
- /* No side effects, safe to clear O_CREAT */
- create_error = -EROFS;
- open_flag &= ~O_CREAT;
- }
- }
-
- if (open_flag & O_CREAT) {
- error = may_o_create(&nd->path, dentry, mode);
- if (error) {
- create_error = error;
- if (open_flag & O_EXCL)
- goto no_open;
- open_flag &= ~O_CREAT;
- }
- }
-
if (nd->flags & LOOKUP_DIRECTORY)
open_flag |= O_DIRECTORY;
file->f_path.dentry = DENTRY_NOT_SET;
file->f_path.mnt = nd->path.mnt;
- error = dir->i_op->atomic_open(dir, dentry, file, open_flag, mode,
- opened);
- if (error < 0) {
- if (create_error && error == -ENOENT)
- error = create_error;
- goto out;
- }
-
- if (error) { /* returned 1, that is */
+ error = dir->i_op->atomic_open(dir, dentry, file,
+ open_to_namei_flags(open_flag),
+ mode, opened);
+ d_lookup_done(dentry);
+ if (!error) {
+ /*
+ * We didn't have the inode before the open, so check open
+ * permission here.
+ */
+ int acc_mode = op->acc_mode;
+ if (*opened & FILE_CREATED) {
+ WARN_ON(!(open_flag & O_CREAT));
+ fsnotify_create(dir, dentry);
+ acc_mode = 0;
+ }
+ error = may_open(&file->f_path, acc_mode, open_flag);
+ if (WARN_ON(error > 0))
+ error = -EINVAL;
+ } else if (error > 0) {
if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) {
error = -EIO;
- goto out;
- }
- if (file->f_path.dentry) {
- dput(dentry);
- dentry = file->f_path.dentry;
- }
- if (*opened & FILE_CREATED)
- fsnotify_create(dir, dentry);
- if (!dentry->d_inode) {
- WARN_ON(*opened & FILE_CREATED);
- if (create_error) {
- error = create_error;
- goto out;
- }
} else {
- if (excl && !(*opened & FILE_CREATED)) {
- error = -EEXIST;
- goto out;
+ if (file->f_path.dentry) {
+ dput(dentry);
+ dentry = file->f_path.dentry;
}
+ if (*opened & FILE_CREATED)
+ fsnotify_create(dir, dentry);
+ path->dentry = dentry;
+ path->mnt = nd->path.mnt;
+ return 1;
}
- goto looked_up;
- }
-
- /*
- * We didn't have the inode before the open, so check open permission
- * here.
- */
- acc_mode = op->acc_mode;
- if (*opened & FILE_CREATED) {
- WARN_ON(!(open_flag & O_CREAT));
- fsnotify_create(dir, dentry);
- acc_mode = 0;
}
- error = may_open(&file->f_path, acc_mode, open_flag);
- if (error)
- fput(file);
-
-out:
dput(dentry);
return error;
-
-no_open:
- if (need_lookup) {
- dentry = lookup_real(dir, dentry, nd->flags);
- if (IS_ERR(dentry))
- return PTR_ERR(dentry);
- }
- if (create_error && !dentry->d_inode) {
- error = create_error;
- goto out;
- }
-looked_up:
- path->dentry = dentry;
- path->mnt = nd->path.mnt;
- return 1;
}
/*
{
struct dentry *dir = nd->path.dentry;
struct inode *dir_inode = dir->d_inode;
+ int open_flag = op->open_flag;
struct dentry *dentry;
- int error;
- bool need_lookup = false;
+ int error, create_error = 0;
+ umode_t mode = op->mode;
+ DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+
+ if (unlikely(IS_DEADDIR(dir_inode)))
+ return -ENOENT;
*opened &= ~FILE_CREATED;
- dentry = lookup_dcache(&nd->last, dir, nd->flags);
- if (IS_ERR(dentry))
- return PTR_ERR(dentry);
+ dentry = d_lookup(dir, &nd->last);
+ for (;;) {
+ if (!dentry) {
+ dentry = d_alloc_parallel(dir, &nd->last, &wq);
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+ }
+ if (d_in_lookup(dentry))
+ break;
- if (!dentry) {
- dentry = d_alloc(dir, &nd->last);
- if (unlikely(!dentry))
- return -ENOMEM;
- need_lookup = true;
- } else if (dentry->d_inode) {
+ if (!(dentry->d_flags & DCACHE_OP_REVALIDATE))
+ break;
+
+ error = d_revalidate(dentry, nd->flags);
+ if (likely(error > 0))
+ break;
+ if (error)
+ goto out_dput;
+ d_invalidate(dentry);
+ dput(dentry);
+ dentry = NULL;
+ }
+ if (dentry->d_inode) {
/* Cached positive dentry: will open in f_op->open */
goto out_no_open;
}
- if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) {
- return atomic_open(nd, dentry, path, file, op, got_write,
- need_lookup, opened);
+ /*
+ * Checking write permission is tricky, bacuse we don't know if we are
+ * going to actually need it: O_CREAT opens should work as long as the
+ * file exists. But checking existence breaks atomicity. The trick is
+ * to check access and if not granted clear O_CREAT from the flags.
+ *
+ * Another problem is returing the "right" error value (e.g. for an
+ * O_EXCL open we want to return EEXIST not EROFS).
+ */
+ if (open_flag & O_CREAT) {
+ if (!IS_POSIXACL(dir->d_inode))
+ mode &= ~current_umask();
+ if (unlikely(!got_write)) {
+ create_error = -EROFS;
+ open_flag &= ~O_CREAT;
+ if (open_flag & (O_EXCL | O_TRUNC))
+ goto no_open;
+ /* No side effects, safe to clear O_CREAT */
+ } else {
+ create_error = may_o_create(&nd->path, dentry, mode);
+ if (create_error) {
+ open_flag &= ~O_CREAT;
+ if (open_flag & O_EXCL)
+ goto no_open;
+ }
+ }
+ } else if ((open_flag & (O_TRUNC|O_WRONLY|O_RDWR)) &&
+ unlikely(!got_write)) {
+ /*
+ * No O_CREATE -> atomicity not a requirement -> fall
+ * back to lookup + open
+ */
+ goto no_open;
}
- if (need_lookup) {
- BUG_ON(dentry->d_inode);
+ if (dir_inode->i_op->atomic_open) {
+ error = atomic_open(nd, dentry, path, file, op, open_flag,
+ mode, opened);
+ if (unlikely(error == -ENOENT) && create_error)
+ error = create_error;
+ return error;
+ }
- dentry = lookup_real(dir_inode, dentry, nd->flags);
- if (IS_ERR(dentry))
- return PTR_ERR(dentry);
+no_open:
+ if (d_in_lookup(dentry)) {
+ struct dentry *res = dir_inode->i_op->lookup(dir_inode, dentry,
+ nd->flags);
+ d_lookup_done(dentry);
+ if (unlikely(res)) {
+ if (IS_ERR(res)) {
+ error = PTR_ERR(res);
+ goto out_dput;
+ }
+ dput(dentry);
+ dentry = res;
+ }
}
/* Negative dentry, just create the file */
- if (!dentry->d_inode && (op->open_flag & O_CREAT)) {
- umode_t mode = op->mode;
- if (!IS_POSIXACL(dir->d_inode))
- mode &= ~current_umask();
- /*
- * This write is needed to ensure that a
- * rw->ro transition does not occur between
- * the time when the file is created and when
- * a permanent write count is taken through
- * the 'struct file' in finish_open().
- */
- if (!got_write) {
- error = -EROFS;
- goto out_dput;
- }
+ if (!dentry->d_inode && (open_flag & O_CREAT)) {
*opened |= FILE_CREATED;
- error = security_path_mknod(&nd->path, dentry, mode, 0);
- if (error)
+ audit_inode_child(dir_inode, dentry, AUDIT_TYPE_CHILD_CREATE);
+ if (!dir_inode->i_op->create) {
+ error = -EACCES;
goto out_dput;
- error = vfs_create(dir->d_inode, dentry, mode,
- nd->flags & LOOKUP_EXCL);
+ }
+ error = dir_inode->i_op->create(dir_inode, dentry, mode,
+ open_flag & O_EXCL);
if (error)
goto out_dput;
+ fsnotify_create(dir_inode, dentry);
+ }
+ if (unlikely(create_error) && !dentry->d_inode) {
+ error = create_error;
+ goto out_dput;
}
out_no_open:
path->dentry = dentry;
}
retry_lookup:
- if (op->open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) {
+ if (open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) {
error = mnt_want_write(nd->path.mnt);
if (!error)
got_write = true;
* dropping this one anyway.
*/
}
- inode_lock(dir->d_inode);
+ if (open_flag & O_CREAT)
+ inode_lock(dir->d_inode);
+ else
+ inode_lock_shared(dir->d_inode);
error = lookup_open(nd, &path, file, op, got_write, opened);
- inode_unlock(dir->d_inode);
+ if (open_flag & O_CREAT)
+ inode_unlock(dir->d_inode);
+ else
+ inode_unlock_shared(dir->d_inode);
if (error <= 0) {
if (error)
return error;
}
audit_inode(nd->name, nd->path.dentry, 0);
- if (unlikely(d_is_symlink(nd->path.dentry)) && !(open_flag & O_PATH)) {
- error = -ELOOP;
- goto out;
- }
error = -EISDIR;
if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry))
goto out;
got_write = true;
}
finish_open_created:
- if (likely(!(open_flag & O_PATH))) {
- error = may_open(&nd->path, acc_mode, open_flag);
- if (error)
- goto out;
- }
+ error = may_open(&nd->path, acc_mode, open_flag);
+ if (error)
+ goto out;
BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
error = vfs_open(&nd->path, file, current_cred());
if (!error) {
}
opened:
error = open_check_o_direct(file);
- if (error)
- goto exit_fput;
- error = ima_file_check(file, op->acc_mode, *opened);
- if (error)
- goto exit_fput;
-
- if (will_truncate) {
+ if (!error)
+ error = ima_file_check(file, op->acc_mode, *opened);
+ if (!error && will_truncate)
error = handle_truncate(file);
- if (error)
- goto exit_fput;
- }
out:
+ if (unlikely(error) && (*opened & FILE_OPENED))
+ fput(file);
if (unlikely(error > 0)) {
WARN_ON(1);
error = -EINVAL;
path_put(&save_parent);
return error;
-exit_fput:
- fput(file);
- goto out;
-
stale_open:
/* If no saved parent or already retried then can't retry */
if (!save_parent.dentry || retried)
return error;
}
+static int do_o_path(struct nameidata *nd, unsigned flags, struct file *file)
+{
+ struct path path;
+ int error = path_lookupat(nd, flags, &path);
+ if (!error) {
+ audit_inode(nd->name, path.dentry, 0);
+ error = vfs_open(&path, file, current_cred());
+ path_put(&path);
+ }
+ return error;
+}
+
static struct file *path_openat(struct nameidata *nd,
const struct open_flags *op, unsigned flags)
{
goto out2;
}
+ if (unlikely(file->f_flags & O_PATH)) {
+ error = do_o_path(nd, flags, file);
+ if (!error)
+ opened |= FILE_OPENED;
+ goto out2;
+ }
+
s = path_init(nd, flags);
if (IS_ERR(s)) {
put_filp(file);
switch (mode & S_IFMT) {
case 0: case S_IFREG:
error = vfs_create(path.dentry->d_inode,dentry,mode,true);
+ if (!error)
+ ima_post_path_mknod(dentry);
break;
case S_IFCHR: case S_IFBLK:
error = vfs_mknod(path.dentry->d_inode,dentry,mode,
out:
return len;
}
-EXPORT_SYMBOL(readlink_copy);
/*
* A helper for ->readlink(). This should be used *ONLY* for symlinks that