Commit be080a6f authored by David Howells's avatar David Howells

afs: Overhaul permit caching

Overhaul permit caching in AFS by making it per-vnode and sharing permit
lists where possible.

When most of the fileserver operations are called, they return a status
structure indicating the (revised) details of the vnode or vnodes involved
in the operation.  This includes the access mark derived from the ACL
(named CallerAccess in the protocol definition file).  This is cacheable
and if the ACL changes, the server will tell us that it is breaking the
callback promise, at which point we can discard the currently cached
permits.

With this patch, the afs_permits structure has, at the end, an array of
{ key, CallerAccess } elements, sorted by key pointer.  This is then cached
in a hash table so that it can be shared between vnodes with the same
access permits.

Permit lists can only be shared if they contain the exact same set of
key->CallerAccess mappings.

Note that that table is global rather than being per-net_ns.  If the keys
in a permit list cross net_ns boundaries, there is no problem sharing the
cached permits, since the permits are just integer masks.

Since permit lists pin keys, the permit cache also makes it easier for a
future patch to find all occurrences of a key and remove them by means of
setting the afs_permits::invalidated flag and then clearing the appropriate
key pointer.  In such an event, memory barriers will need adding.

Lastly, the permit caching is skipped if the server has sent either a
vnode-specific or an entire-server callback since the start of the
operation.
Signed-off-by: default avatarDavid Howells <dhowells@redhat.com>
parent c435ee34
......@@ -136,7 +136,6 @@ struct afs_file_status {
afs_access_t caller_access; /* access rights for authenticated caller */
afs_access_t anon_access; /* access rights for unauthenticated caller */
umode_t mode; /* UNIX mode */
struct afs_fid parent; /* parent dir ID for non-dirs only */
time_t mtime_client; /* last time client changed data */
time_t mtime_server; /* last time server changed data */
s32 lock_count; /* file lock count (0=UNLK -1=WRLCK +ve=#RDLCK */
......
......@@ -383,7 +383,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
/* again, make sure we've got a callback on this file and, again, make
* sure that our view of the data version is up to date (we ignore
* errors incurred here and deal with the consequences elsewhere) */
afs_vnode_fetch_status(vnode, NULL, key, false);
afs_vnode_fetch_status(vnode, key, false);
error:
spin_unlock(&inode->i_lock);
......@@ -455,7 +455,7 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl)
posix_test_lock(file, fl);
if (fl->fl_type == F_UNLCK) {
/* no local locks; consult the server */
ret = afs_vnode_fetch_status(vnode, NULL, key, true);
ret = afs_vnode_fetch_status(vnode, key, true);
if (ret < 0)
goto error;
lock_count = vnode->status.lock_count;
......
......@@ -78,8 +78,8 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
EXTRACT(status->caller_access); /* call ticket dependent */
EXTRACT(status->anon_access);
EXTRACT(status->mode);
EXTRACT(status->parent.vnode);
EXTRACT(status->parent.unique);
bp++; /* parent.vnode */
bp++; /* parent.unique */
bp++; /* seg size */
status->mtime_client = ntohl(*bp++);
status->mtime_server = ntohl(*bp++);
......@@ -103,7 +103,6 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
status->mtime_client, status->mtime_server);
if (vnode) {
status->parent.vid = vnode->fid.vid;
if (changed && !test_bit(AFS_VNODE_UNSET, &vnode->flags)) {
_debug("vnode changed");
i_size_write(&vnode->vfs_inode, size);
......
......@@ -238,7 +238,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
if (!status) {
/* it's a remotely extant inode */
ret = afs_vnode_fetch_status(vnode, NULL, key, true);
ret = afs_vnode_fetch_status(vnode, key, true);
if (ret < 0)
goto bad_inode;
} else {
......@@ -358,7 +358,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
* access */
if (!test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
_debug("not promised");
ret = afs_vnode_fetch_status(vnode, NULL, key, false);
ret = afs_vnode_fetch_status(vnode, key, false);
if (ret < 0) {
if (ret == -ENOENT) {
set_bit(AFS_VNODE_DELETED, &vnode->flags);
......@@ -431,7 +431,6 @@ int afs_drop_inode(struct inode *inode)
*/
void afs_evict_inode(struct inode *inode)
{
struct afs_permits *permits;
struct afs_vnode *vnode;
vnode = AFS_FS_I(inode);
......@@ -460,13 +459,7 @@ void afs_evict_inode(struct inode *inode)
vnode->cache = NULL;
#endif
mutex_lock(&vnode->permits_lock);
permits = vnode->permits;
RCU_INIT_POINTER(vnode->permits, NULL);
mutex_unlock(&vnode->permits_lock);
if (permits)
call_rcu(&permits->rcu, afs_zap_permits);
afs_put_permits(vnode->permit_cache);
_leave("");
}
......
......@@ -389,8 +389,7 @@ struct afs_vnode {
#ifdef CONFIG_AFS_FSCACHE
struct fscache_cookie *cache; /* caching cookie */
#endif
struct afs_permits *permits; /* cache of permits so far obtained */
struct mutex permits_lock; /* lock for altering permits list */
struct afs_permits *permit_cache; /* cache of permits so far obtained */
struct mutex validate_lock; /* lock for validating this vnode */
wait_queue_head_t update_waitq; /* status fetch waitqueue */
int update_cnt; /* number of outstanding ops that will update the
......@@ -411,8 +410,6 @@ struct afs_vnode {
#define AFS_VNODE_AUTOCELL 10 /* set if Vnode is an auto mount point */
#define AFS_VNODE_PSEUDODIR 11 /* set if Vnode is a pseudo directory */
long acl_order; /* ACL check count (callback break count) */
struct list_head writebacks; /* alterations in pagecache that need writing */
struct list_head pending_locks; /* locks waiting to be granted */
struct list_head granted_locks; /* locks granted on this file */
......@@ -435,16 +432,21 @@ struct afs_vnode {
*/
struct afs_permit {
struct key *key; /* RxRPC ticket holding a security context */
afs_access_t access_mask; /* access mask for this key */
afs_access_t access; /* CallerAccess value for this key */
};
/*
* cache of security records from attempts to access a vnode
* Immutable cache of CallerAccess records from attempts to access vnodes.
* These may be shared between multiple vnodes.
*/
struct afs_permits {
struct rcu_head rcu; /* disposal procedure */
int count; /* number of records */
struct afs_permit permits[0]; /* the permits so far examined */
struct rcu_head rcu;
struct hlist_node hash_node; /* Link in hash */
unsigned long h; /* Hash value for this permit list */
refcount_t usage;
unsigned short nr_permits; /* Number of records */
bool invalidated; /* Invalidated due to key change */
struct afs_permit permits[]; /* List of permits sorted by key pointer */
};
/*
......@@ -682,11 +684,13 @@ static inline int afs_transfer_reply(struct afs_call *call)
/*
* security.c
*/
extern void afs_put_permits(struct afs_permits *);
extern void afs_clear_permits(struct afs_vnode *);
extern void afs_cache_permit(struct afs_vnode *, struct key *, long);
extern void afs_cache_permit(struct afs_vnode *, struct key *, unsigned int);
extern void afs_zap_permits(struct rcu_head *);
extern struct key *afs_request_key(struct afs_cell *);
extern int afs_permission(struct inode *, int);
extern void __exit afs_clean_up_permit_cache(void);
/*
* server.c
......@@ -757,8 +761,7 @@ static inline struct inode *AFS_VNODE_TO_I(struct afs_vnode *vnode)
extern void afs_vnode_finalise_status_update(struct afs_vnode *,
struct afs_server *);
extern int afs_vnode_fetch_status(struct afs_vnode *, struct afs_vnode *,
struct key *, bool);
extern int afs_vnode_fetch_status(struct afs_vnode *, struct key *, bool);
extern int afs_vnode_fetch_data(struct afs_vnode *, struct key *,
struct afs_read *);
extern int afs_vnode_create(struct afs_vnode *, struct key *, const char *,
......
......@@ -186,6 +186,7 @@ static void __exit afs_exit(void)
destroy_workqueue(afs_vlocation_update_worker);
destroy_workqueue(afs_async_calls);
destroy_workqueue(afs_wq);
afs_clean_up_permit_cache();
rcu_barrier();
}
......
/* AFS security handling
*
* Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
* Copyright (C) 2007, 2017 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
......@@ -14,9 +14,13 @@
#include <linux/fs.h>
#include <linux/ctype.h>
#include <linux/sched.h>
#include <linux/hashtable.h>
#include <keys/rxrpc-type.h>
#include "internal.h"
static DEFINE_HASHTABLE(afs_permits_cache, 10);
static DEFINE_SPINLOCK(afs_permits_lock);
/*
* get a key
*/
......@@ -46,168 +50,233 @@ struct key *afs_request_key(struct afs_cell *cell)
}
/*
* dispose of a permits list
* Dispose of a list of permits.
*/
void afs_zap_permits(struct rcu_head *rcu)
static void afs_permits_rcu(struct rcu_head *rcu)
{
struct afs_permits *permits =
container_of(rcu, struct afs_permits, rcu);
int loop;
_enter("{%d}", permits->count);
int i;
for (loop = permits->count - 1; loop >= 0; loop--)
key_put(permits->permits[loop].key);
for (i = 0; i < permits->nr_permits; i++)
key_put(permits->permits[i].key);
kfree(permits);
}
/*
* dispose of a permits list in which all the key pointers have been copied
* Discard a permission cache.
*/
static void afs_dispose_of_permits(struct rcu_head *rcu)
void afs_put_permits(struct afs_permits *permits)
{
struct afs_permits *permits =
container_of(rcu, struct afs_permits, rcu);
_enter("{%d}", permits->count);
kfree(permits);
if (permits && refcount_dec_and_test(&permits->usage)) {
spin_lock(&afs_permits_lock);
hash_del_rcu(&permits->hash_node);
spin_unlock(&afs_permits_lock);
call_rcu(&permits->rcu, afs_permits_rcu);
}
}
/*
* get the authorising vnode - this is the specified inode itself if it's a
* directory or it's the parent directory if the specified inode is a file or
* symlink
* - the caller must release the ref on the inode
* Clear a permit cache on callback break.
*/
static struct afs_vnode *afs_get_auth_inode(struct afs_vnode *vnode,
struct key *key)
void afs_clear_permits(struct afs_vnode *vnode)
{
struct afs_vnode *auth_vnode;
struct inode *auth_inode;
_enter("");
struct afs_permits *permits;
if (S_ISDIR(vnode->vfs_inode.i_mode)) {
auth_inode = igrab(&vnode->vfs_inode);
ASSERT(auth_inode != NULL);
} else {
auth_inode = afs_iget(vnode->vfs_inode.i_sb, key,
&vnode->status.parent, NULL, NULL);
if (IS_ERR(auth_inode))
return ERR_CAST(auth_inode);
}
spin_lock(&vnode->lock);
permits = rcu_dereference_protected(vnode->permit_cache,
lockdep_is_held(&vnode->lock));
RCU_INIT_POINTER(vnode->permit_cache, NULL);
vnode->cb_break++;
spin_unlock(&vnode->lock);
auth_vnode = AFS_FS_I(auth_inode);
_leave(" = {%x}", auth_vnode->fid.vnode);
return auth_vnode;
if (permits)
afs_put_permits(permits);
}
/*
* clear the permit cache on a directory vnode
* Hash a list of permits. Use simple addition to make it easy to add an extra
* one at an as-yet indeterminate position in the list.
*/
void afs_clear_permits(struct afs_vnode *vnode)
static void afs_hash_permits(struct afs_permits *permits)
{
struct afs_permits *permits;
unsigned long h = permits->nr_permits;
int i;
_enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode);
mutex_lock(&vnode->permits_lock);
permits = vnode->permits;
RCU_INIT_POINTER(vnode->permits, NULL);
vnode->cb_break++;
mutex_unlock(&vnode->permits_lock);
for (i = 0; i < permits->nr_permits; i++) {
h += (unsigned long)permits->permits[i].key / sizeof(void *);
h += permits->permits[i].access;
}
if (permits)
call_rcu(&permits->rcu, afs_zap_permits);
_leave("");
permits->h = h;
}
/*
* add the result obtained for a vnode to its or its parent directory's cache
* for the key used to access it
* Cache the CallerAccess result obtained from doing a fileserver operation
* that returned a vnode status for a particular key. If a callback break
* occurs whilst the operation was in progress then we have to ditch the cache
* as the ACL *may* have changed.
*/
void afs_cache_permit(struct afs_vnode *vnode, struct key *key, long acl_order)
void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
unsigned int cb_break)
{
struct afs_permits *permits, *xpermits;
struct afs_permit *permit;
struct afs_vnode *auth_vnode;
int count, loop;
struct afs_permits *permits, *xpermits, *replacement, *new = NULL;
afs_access_t caller_access = READ_ONCE(vnode->status.caller_access);
size_t size = 0;
bool changed = false;
int i, j;
_enter("{%x:%u},%x,%x",
vnode->fid.vid, vnode->fid.vnode, key_serial(key), caller_access);
rcu_read_lock();
/* Check for the common case first: We got back the same access as last
* time we tried and already have it recorded.
*/
permits = rcu_dereference(vnode->permit_cache);
if (permits) {
if (!permits->invalidated) {
for (i = 0; i < permits->nr_permits; i++) {
if (permits->permits[i].key < key)
continue;
if (permits->permits[i].key > key)
break;
if (permits->permits[i].access != caller_access) {
changed = true;
break;
}
_enter("{%x:%u},%x,%lx",
vnode->fid.vid, vnode->fid.vnode, key_serial(key), acl_order);
if (cb_break != (vnode->cb_break +
vnode->cb_interest->server->cb_s_break)) {
changed = true;
break;
}
auth_vnode = afs_get_auth_inode(vnode, key);
if (IS_ERR(auth_vnode)) {
_leave(" [get error %ld]", PTR_ERR(auth_vnode));
return;
}
/* The cache is still good. */
rcu_read_unlock();
return;
}
}
mutex_lock(&auth_vnode->permits_lock);
changed |= permits->invalidated;
size = permits->nr_permits;
/* guard against a rename being detected whilst we waited for the
* lock */
if (memcmp(&auth_vnode->fid, &vnode->status.parent,
sizeof(struct afs_fid)) != 0) {
_debug("renamed");
goto out_unlock;
/* If this set of permits is now wrong, clear the permits
* pointer so that no one tries to use the stale information.
*/
if (changed) {
spin_lock(&vnode->lock);
if (permits != rcu_access_pointer(vnode->permit_cache))
goto someone_else_changed_it_unlock;
RCU_INIT_POINTER(vnode->permit_cache, NULL);
spin_unlock(&vnode->lock);
afs_put_permits(permits);
permits = NULL;
size = 0;
}
}
/* have to be careful as the directory's callback may be broken between
* us receiving the status we're trying to cache and us getting the
* lock to update the cache for the status */
if (auth_vnode->acl_order - acl_order > 0) {
_debug("ACL changed?");
goto out_unlock;
if (cb_break != (vnode->cb_break + vnode->cb_interest->server->cb_s_break)) {
rcu_read_unlock();
goto someone_else_changed_it;
}
/* always update the anonymous mask */
_debug("anon access %x", vnode->status.anon_access);
auth_vnode->status.anon_access = vnode->status.anon_access;
if (key == vnode->volume->cell->anonymous_key)
goto out_unlock;
xpermits = auth_vnode->permits;
count = 0;
if (xpermits) {
/* see if the permit is already in the list
* - if it is then we just amend the list
*/
count = xpermits->count;
permit = xpermits->permits;
for (loop = count; loop > 0; loop--) {
if (permit->key == key) {
permit->access_mask =
vnode->status.caller_access;
goto out_unlock;
/* We need a ref on any permits list we want to copy as we'll have to
* drop the lock to do memory allocation.
*/
if (permits && !refcount_inc_not_zero(&permits->usage)) {
rcu_read_unlock();
goto someone_else_changed_it;
}
rcu_read_unlock();
/* Speculatively create a new list with the revised permission set. We
* discard this if we find an extant match already in the hash, but
* it's easier to compare with memcmp this way.
*
* We fill in the key pointers at this time, but we don't get the refs
* yet.
*/
size++;
new = kzalloc(sizeof(struct afs_permits) +
sizeof(struct afs_permit) * size, GFP_NOFS);
if (!new)
return;
refcount_set(&new->usage, 1);
new->nr_permits = size;
i = j = 0;
if (permits) {
for (i = 0; i < permits->nr_permits; i++) {
if (j == i && permits->permits[i].key > key) {
new->permits[j].key = key;
new->permits[j].access = caller_access;
j++;
}
permit++;
new->permits[j].key = permits->permits[i].key;
new->permits[j].access = permits->permits[i].access;
j++;
}
}
permits = kmalloc(sizeof(*permits) + sizeof(*permit) * (count + 1),
GFP_NOFS);
if (!permits)
goto out_unlock;
if (xpermits)
memcpy(permits->permits, xpermits->permits,
count * sizeof(struct afs_permit));
_debug("key %x access %x",
key_serial(key), vnode->status.caller_access);
permits->permits[count].access_mask = vnode->status.caller_access;
permits->permits[count].key = key_get(key);
permits->count = count + 1;
rcu_assign_pointer(auth_vnode->permits, permits);
if (xpermits)
call_rcu(&xpermits->rcu, afs_dispose_of_permits);
out_unlock:
mutex_unlock(&auth_vnode->permits_lock);
iput(&auth_vnode->vfs_inode);
_leave("");
if (j == i) {
new->permits[j].key = key;
new->permits[j].access = caller_access;
}
afs_hash_permits(new);
afs_put_permits(permits);
/* Now see if the permit list we want is actually already available */
spin_lock(&afs_permits_lock);
hash_for_each_possible(afs_permits_cache, xpermits, hash_node, new->h) {
if (xpermits->h != new->h ||
xpermits->invalidated ||
xpermits->nr_permits != new->nr_permits ||
memcmp(xpermits->permits, new->permits,
new->nr_permits * sizeof(struct afs_permit)) != 0)
continue;
if (refcount_inc_not_zero(&xpermits->usage)) {
replacement = xpermits;
goto found;
}
break;
}
for (i = 0; i < new->nr_permits; i++)
key_get(new->permits[i].key);
hash_add_rcu(afs_permits_cache, &new->hash_node, new->h);
replacement = new;
new = NULL;
found:
spin_unlock(&afs_permits_lock);
kfree(new);
spin_lock(&vnode->lock);
if (cb_break != (vnode->cb_break + vnode->cb_interest->server->cb_s_break) ||
permits != rcu_access_pointer(vnode->permit_cache))
goto someone_else_changed_it_unlock;
rcu_assign_pointer(vnode->permit_cache, replacement);
spin_unlock(&vnode->lock);
afs_put_permits(permits);
return;
someone_else_changed_it_unlock:
spin_unlock(&vnode->lock);
someone_else_changed_it:
/* Someone else changed the cache under us - don't recheck at this
* time.
*/
return;
}
/*
......@@ -219,55 +288,45 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key,
afs_access_t *_access)
{
struct afs_permits *permits;
struct afs_permit *permit;
struct afs_vnode *auth_vnode;
bool valid;
int loop, ret;
bool valid = false;
int i, ret;
_enter("{%x:%u},%x",
vnode->fid.vid, vnode->fid.vnode, key_serial(key));
auth_vnode = afs_get_auth_inode(vnode, key);
if (IS_ERR(auth_vnode)) {
*_access = 0;
_leave(" = %ld", PTR_ERR(auth_vnode));
return PTR_ERR(auth_vnode);
}
ASSERT(S_ISDIR(auth_vnode->vfs_inode.i_mode));
permits = vnode->permit_cache;
/* check the permits to see if we've got one yet */
if (key == auth_vnode->volume->cell->anonymous_key) {
if (key == vnode->volume->cell->anonymous_key) {
_debug("anon");
*_access = auth_vnode->status.anon_access;
*_access = vnode->status.anon_access;
valid = true;
} else {
valid = false;
rcu_read_lock();
permits = rcu_dereference(auth_vnode->permits);
permits = rcu_dereference(vnode->permit_cache);
if (permits) {
permit = permits->permits;
for (loop = permits->count; loop > 0; loop--) {
if (permit->key == key) {
_debug("found in cache");
*_access = permit->access_mask;
valid = true;
for (i = 0; i < permits->nr_permits; i++) {
if (permits->permits[i].key < key)
continue;
if (permits->permits[i].key > key)
break;
}
permit++;
*_access = permits->permits[i].access;
valid = !permits->invalidated;
break;
}
}
rcu_read_unlock();
}
if (!valid) {
/* check the status on the file we're actually interested in
* (the post-processing will cache the result on auth_vnode) */
/* Check the status on the file we're actually interested in
* (the post-processing will cache the result).
*/
_debug("no valid permit");
ret = afs_vnode_fetch_status(vnode, auth_vnode, key, true);
ret = afs_vnode_fetch_status(vnode, key, true);
if (ret < 0) {
iput(&auth_vnode->vfs_inode);
*_access = 0;
_leave(" = %d", ret);
return ret;
......@@ -275,7 +334,6 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key,
*_access = vnode->status.caller_access;
}
iput(&auth_vnode->vfs_inode);
_leave(" = 0 [access %x]", *_access);
return 0;
}
......@@ -360,3 +418,12 @@ int afs_permission(struct inode *inode, int mask)
_leave(" = %d", ret);
return ret;
}
void __exit afs_clean_up_permit_cache(void)
{
int i;
for (i = 0; i < HASH_SIZE(afs_permits_cache); i++)
WARN_ON_ONCE(!hlist_empty(&afs_permits_cache[i]));
}
......@@ -532,7 +532,6 @@ static void afs_i_init_once(void *_vnode)
memset(vnode, 0, sizeof(*vnode));
inode_init_once(&vnode->vfs_inode);
init_waitqueue_head(&vnode->update_waitq);
mutex_init(&vnode->permits_lock);
mutex_init(&vnode->validate_lock);
spin_lock_init(&vnode->writeback_lock);
spin_lock_init(&vnode->lock);
......
......@@ -88,11 +88,10 @@ static void afs_vnode_status_update_failed(struct afs_vnode *vnode, int ret)
* - there are any outstanding ops that will fetch the status
* - TODO implement local caching
*/
int afs_vnode_fetch_status(struct afs_vnode *vnode, struct afs_vnode *auth_vnode,
struct key *key, bool force)
int afs_vnode_fetch_status(struct afs_vnode *vnode, struct key *key, bool force)
{
struct afs_server *server;
unsigned long acl_order;
unsigned int cb_break = 0;
int ret;
DECLARE_WAITQUEUE(myself, current);
......@@ -113,9 +112,7 @@ int afs_vnode_fetch_status(struct afs_vnode *vnode, struct afs_vnode *auth_vnode
return -ENOENT;
}
acl_order = 0;
if (auth_vnode)
acl_order = auth_vnode->acl_order;
cb_break = vnode->cb_break + vnode->cb_s_break;
spin_lock(&vnode->lock);
......@@ -192,8 +189,7 @@ int afs_vnode_fetch_status(struct afs_vnode *vnode, struct afs_vnode *auth_vnode
/* adjust the flags */
if (ret == 0) {
_debug("adjust");
if (auth_vnode)
afs_cache_permit(vnode, key, acl_order);
afs_cache_permit(vnode, key, cb_break);
afs_vnode_finalise_status_update(vnode, server);
afs_put_server(afs_v2net(vnode), server);
} else {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment