Commit 8a070a96 authored by David Howells's avatar David Howells

afs: Detect cell aliases 1 - Cells with root volumes

Put in the first phase of cell alias detection.  This part handles alias
detection for cells that have root.cell volumes (which is expected to be
likely).

When a cell becomes newly active, it is probed for its root.cell volume,
and if it has one, this volume is compared against other root.cell volumes
to find out if the list of fileserver UUIDs have any in common - and if
that's the case, do the address lists of those fileservers have any
addresses in common.  If they do, the new cell is adjudged to be an alias
of the old cell and the old cell is used instead.

Comparing is aided by the server list in struct afs_server_list being
sorted in UUID order and the addresses in the fileserver address lists
being sorted in address order.

The cell then retains the afs_volume object for the root.cell volume, even
if it's not mounted for future alias checking.

This necessary because:

 (1) Whilst fileservers have UUIDs that are meant to be globally unique, in
     practice they are not because cells get cloned without changing the
     UUIDs - so afs_server records need to be per cell.

 (2) Sometimes the DNS is used to make cell aliases - but if we don't know
     they're the same, we may end up with multiple superblocks and multiple
     afs_server records for the same thing, impairing our ability to
     deliver callback notifications of third party changes

 (3) The fileserver RPC API doesn't contain the cell name, so it can't tell
     us which cell it's notifying and can't see that a change made to to
     one cell should notify the same client that's also accessed as the
     other cell.
Reported-by: default avatarJeffrey Altman <jaltman@auristor.com>
Signed-off-by: default avatarDavid Howells <dhowells@redhat.com>
parent c3e9f888
...@@ -31,6 +31,7 @@ kafs-y := \ ...@@ -31,6 +31,7 @@ kafs-y := \
server_list.o \ server_list.o \
super.o \ super.o \
vlclient.o \ vlclient.o \
vl_alias.o \
vl_list.o \ vl_list.o \
vl_probe.o \ vl_probe.o \
vl_rotate.o \ vl_rotate.o \
......
...@@ -164,6 +164,7 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, ...@@ -164,6 +164,7 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
INIT_LIST_HEAD(&cell->proc_volumes); INIT_LIST_HEAD(&cell->proc_volumes);
rwlock_init(&cell->proc_lock); rwlock_init(&cell->proc_lock);
rwlock_init(&cell->vl_servers_lock); rwlock_init(&cell->vl_servers_lock);
cell->flags = (1 << AFS_CELL_FL_CHECK_ALIAS);
/* Provide a VL server list, filling it in if we were given a list of /* Provide a VL server list, filling it in if we were given a list of
* addresses to use. * addresses to use.
...@@ -481,7 +482,9 @@ static void afs_cell_destroy(struct rcu_head *rcu) ...@@ -481,7 +482,9 @@ static void afs_cell_destroy(struct rcu_head *rcu)
ASSERTCMP(atomic_read(&cell->usage), ==, 0); ASSERTCMP(atomic_read(&cell->usage), ==, 0);
afs_put_volume(cell->net, cell->root_volume);
afs_put_vlserverlist(cell->net, rcu_access_pointer(cell->vl_servers)); afs_put_vlserverlist(cell->net, rcu_access_pointer(cell->vl_servers));
afs_put_cell(cell->net, cell->alias_of);
key_put(cell->anonymous_key); key_put(cell->anonymous_key);
kfree(cell); kfree(cell);
......
...@@ -269,6 +269,7 @@ struct afs_net { ...@@ -269,6 +269,7 @@ struct afs_net {
struct timer_list cells_timer; struct timer_list cells_timer;
atomic_t cells_outstanding; atomic_t cells_outstanding;
seqlock_t cells_lock; seqlock_t cells_lock;
struct mutex cells_alias_lock;
struct mutex proc_cells_lock; struct mutex proc_cells_lock;
struct hlist_head proc_cells; struct hlist_head proc_cells;
...@@ -342,8 +343,10 @@ enum afs_cell_state { ...@@ -342,8 +343,10 @@ enum afs_cell_state {
* for authentication and encryption. The cell name is not typically used in * for authentication and encryption. The cell name is not typically used in
* the protocol. * the protocol.
* *
* There is no easy way to determine if two cells are aliases or one is a * Two cells are determined to be aliases if they have an explicit alias (YFS
* subset of another. * only), share any VL servers in common or have at least one volume in common.
* "In common" means that the address list of the VL servers or the fileservers
* share at least one endpoint.
*/ */
struct afs_cell { struct afs_cell {
union { union {
...@@ -351,6 +354,8 @@ struct afs_cell { ...@@ -351,6 +354,8 @@ struct afs_cell {
struct rb_node net_node; /* Node in net->cells */ struct rb_node net_node; /* Node in net->cells */
}; };
struct afs_net *net; struct afs_net *net;
struct afs_cell *alias_of; /* The cell this is an alias of */
struct afs_volume *root_volume; /* The root.cell volume if there is one */
struct key *anonymous_key; /* anonymous user key for this cell */ struct key *anonymous_key; /* anonymous user key for this cell */
struct work_struct manager; /* Manager for init/deinit/dns */ struct work_struct manager; /* Manager for init/deinit/dns */
struct hlist_node proc_link; /* /proc cell list link */ struct hlist_node proc_link; /* /proc cell list link */
...@@ -363,6 +368,7 @@ struct afs_cell { ...@@ -363,6 +368,7 @@ struct afs_cell {
unsigned long flags; unsigned long flags;
#define AFS_CELL_FL_NO_GC 0 /* The cell was added manually, don't auto-gc */ #define AFS_CELL_FL_NO_GC 0 /* The cell was added manually, don't auto-gc */
#define AFS_CELL_FL_DO_LOOKUP 1 /* DNS lookup requested */ #define AFS_CELL_FL_DO_LOOKUP 1 /* DNS lookup requested */
#define AFS_CELL_FL_CHECK_ALIAS 2 /* Need to check for aliases */
enum afs_cell_state state; enum afs_cell_state state;
short error; short error;
enum dns_record_source dns_source:8; /* Latest source of data from lookup */ enum dns_record_source dns_source:8; /* Latest source of data from lookup */
...@@ -584,7 +590,7 @@ struct afs_volume { ...@@ -584,7 +590,7 @@ struct afs_volume {
#ifdef CONFIG_AFS_FSCACHE #ifdef CONFIG_AFS_FSCACHE
struct fscache_cookie *cache; /* caching cookie */ struct fscache_cookie *cache; /* caching cookie */
#endif #endif
struct afs_server_list *servers; /* List of servers on which volume resides */ struct afs_server_list __rcu *servers; /* List of servers on which volume resides */
rwlock_t servers_lock; /* Lock for ->servers */ rwlock_t servers_lock; /* Lock for ->servers */
unsigned int servers_seq; /* Incremented each time ->servers changes */ unsigned int servers_seq; /* Incremented each time ->servers changes */
...@@ -1376,6 +1382,11 @@ extern struct afs_call *afs_vl_get_capabilities(struct afs_net *, struct afs_add ...@@ -1376,6 +1382,11 @@ extern struct afs_call *afs_vl_get_capabilities(struct afs_net *, struct afs_add
extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *, const uuid_t *); extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *, const uuid_t *);
extern char *afs_yfsvl_get_cell_name(struct afs_vl_cursor *); extern char *afs_yfsvl_get_cell_name(struct afs_vl_cursor *);
/*
* vl_alias.c
*/
extern int afs_cell_detect_alias(struct afs_cell *, struct key *);
/* /*
* vl_probe.c * vl_probe.c
*/ */
......
...@@ -82,6 +82,7 @@ static int __net_init afs_net_init(struct net *net_ns) ...@@ -82,6 +82,7 @@ static int __net_init afs_net_init(struct net *net_ns)
INIT_WORK(&net->cells_manager, afs_manage_cells); INIT_WORK(&net->cells_manager, afs_manage_cells);
timer_setup(&net->cells_timer, afs_cells_timer, 0); timer_setup(&net->cells_timer, afs_cells_timer, 0);
mutex_init(&net->cells_alias_lock);
mutex_init(&net->proc_cells_lock); mutex_init(&net->proc_cells_lock);
INIT_HLIST_HEAD(&net->proc_cells); INIT_HLIST_HEAD(&net->proc_cells);
......
...@@ -38,7 +38,7 @@ static int afs_proc_cells_show(struct seq_file *m, void *v) ...@@ -38,7 +38,7 @@ static int afs_proc_cells_show(struct seq_file *m, void *v)
if (v == SEQ_START_TOKEN) { if (v == SEQ_START_TOKEN) {
/* display header on line 1 */ /* display header on line 1 */
seq_puts(m, "USE TTL SV NAME\n"); seq_puts(m, "USE TTL SV ST NAME\n");
return 0; return 0;
} }
...@@ -46,10 +46,11 @@ static int afs_proc_cells_show(struct seq_file *m, void *v) ...@@ -46,10 +46,11 @@ static int afs_proc_cells_show(struct seq_file *m, void *v)
vllist = rcu_dereference(cell->vl_servers); vllist = rcu_dereference(cell->vl_servers);
/* display one cell per line on subsequent lines */ /* display one cell per line on subsequent lines */
seq_printf(m, "%3u %6lld %2u %s\n", seq_printf(m, "%3u %6lld %2u %2u %s\n",
atomic_read(&cell->usage), atomic_read(&cell->usage),
cell->dns_expiry - ktime_get_real_seconds(), cell->dns_expiry - ktime_get_real_seconds(),
vllist->nr_servers, vllist->nr_servers,
cell->state,
cell->name); cell->name);
return 0; return 0;
} }
......
...@@ -25,7 +25,9 @@ static bool afs_start_fs_iteration(struct afs_operation *op, ...@@ -25,7 +25,9 @@ static bool afs_start_fs_iteration(struct afs_operation *op,
int i; int i;
read_lock(&op->volume->servers_lock); read_lock(&op->volume->servers_lock);
op->server_list = afs_get_serverlist(op->volume->servers); op->server_list = afs_get_serverlist(
rcu_dereference_protected(op->volume->servers,
lockdep_is_held(&op->volume->servers_lock)));
read_unlock(&op->volume->servers_lock); read_unlock(&op->volume->servers_lock);
op->untried = (1UL << op->server_list->nr_servers) - 1; op->untried = (1UL << op->server_list->nr_servers) - 1;
...@@ -173,7 +175,7 @@ bool afs_select_fileserver(struct afs_operation *op) ...@@ -173,7 +175,7 @@ bool afs_select_fileserver(struct afs_operation *op)
/* If the server list didn't change, then assume that /* If the server list didn't change, then assume that
* it's the fileserver having trouble. * it's the fileserver having trouble.
*/ */
if (op->volume->servers == op->server_list) { if (rcu_access_pointer(op->volume->servers) == op->server_list) {
op->error = -EREMOTEIO; op->error = -EREMOTEIO;
goto next_server; goto next_server;
} }
...@@ -263,7 +265,7 @@ bool afs_select_fileserver(struct afs_operation *op) ...@@ -263,7 +265,7 @@ bool afs_select_fileserver(struct afs_operation *op)
* *
* TODO: Retry a few times with sleeps. * TODO: Retry a few times with sleeps.
*/ */
if (op->volume->servers == op->server_list) { if (rcu_access_pointer(op->volume->servers) == op->server_list) {
op->error = -ENOMEDIUM; op->error = -ENOMEDIUM;
goto failed; goto failed;
} }
......
...@@ -352,7 +352,9 @@ static int afs_validate_fc(struct fs_context *fc) ...@@ -352,7 +352,9 @@ static int afs_validate_fc(struct fs_context *fc)
{ {
struct afs_fs_context *ctx = fc->fs_private; struct afs_fs_context *ctx = fc->fs_private;
struct afs_volume *volume; struct afs_volume *volume;
struct afs_cell *cell;
struct key *key; struct key *key;
int ret;
if (!ctx->dyn_root) { if (!ctx->dyn_root) {
if (ctx->no_cell) { if (ctx->no_cell) {
...@@ -365,6 +367,7 @@ static int afs_validate_fc(struct fs_context *fc) ...@@ -365,6 +367,7 @@ static int afs_validate_fc(struct fs_context *fc)
return -EDESTADDRREQ; return -EDESTADDRREQ;
} }
reget_key:
/* We try to do the mount securely. */ /* We try to do the mount securely. */
key = afs_request_key(ctx->cell); key = afs_request_key(ctx->cell);
if (IS_ERR(key)) if (IS_ERR(key))
...@@ -377,6 +380,21 @@ static int afs_validate_fc(struct fs_context *fc) ...@@ -377,6 +380,21 @@ static int afs_validate_fc(struct fs_context *fc)
ctx->volume = NULL; ctx->volume = NULL;
} }
if (test_bit(AFS_CELL_FL_CHECK_ALIAS, &ctx->cell->flags)) {
ret = afs_cell_detect_alias(ctx->cell, key);
if (ret < 0)
return ret;
if (ret == 1) {
_debug("switch to alias");
key_put(ctx->key);
ctx->key = NULL;
cell = afs_get_cell(ctx->cell->alias_of);
afs_put_cell(ctx->net, ctx->cell);
ctx->cell = cell;
goto reget_key;
}
}
volume = afs_create_volume(ctx); volume = afs_create_volume(ctx);
if (IS_ERR(volume)) if (IS_ERR(volume))
return PTR_ERR(volume); return PTR_ERR(volume);
...@@ -518,7 +536,8 @@ static void afs_kill_super(struct super_block *sb) ...@@ -518,7 +536,8 @@ static void afs_kill_super(struct super_block *sb)
* deactivating the superblock. * deactivating the superblock.
*/ */
if (as->volume) if (as->volume)
afs_clear_callback_interests(net, as->volume->servers); afs_clear_callback_interests(
net, rcu_access_pointer(as->volume->servers));
kill_anon_super(sb); kill_anon_super(sb);
if (as->volume) if (as->volume)
afs_deactivate_volume(as->volume); afs_deactivate_volume(as->volume);
......
// SPDX-License-Identifier: GPL-2.0-or-later
/* AFS cell alias detection
*
* Copyright (C) 2020 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*/
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/namei.h>
#include <keys/rxrpc-type.h>
#include "internal.h"
/*
* Sample a volume.
*/
static struct afs_volume *afs_sample_volume(struct afs_cell *cell, struct key *key,
const char *name, unsigned int namelen)
{
struct afs_volume *volume;
struct afs_fs_context fc = {
.type = 0, /* Explicitly leave it to the VLDB */
.volnamesz = namelen,
.volname = name,
.net = cell->net,
.cell = cell,
.key = key, /* This might need to be something */
};
volume = afs_create_volume(&fc);
_leave(" = %px", volume);
return volume;
}
/*
* Compare two addresses.
*/
static int afs_compare_addrs(const struct sockaddr_rxrpc *srx_a,
const struct sockaddr_rxrpc *srx_b)
{
short port_a, port_b;
int addr_a, addr_b, diff;
diff = (short)srx_a->transport_type - (short)srx_b->transport_type;
if (diff)
goto out;
switch (srx_a->transport_type) {
case AF_INET: {
const struct sockaddr_in *a = &srx_a->transport.sin;
const struct sockaddr_in *b = &srx_b->transport.sin;
addr_a = ntohl(a->sin_addr.s_addr);
addr_b = ntohl(b->sin_addr.s_addr);
diff = addr_a - addr_b;
if (diff == 0) {
port_a = ntohs(a->sin_port);
port_b = ntohs(b->sin_port);
diff = port_a - port_b;
}
break;
}
case AF_INET6: {
const struct sockaddr_in6 *a = &srx_a->transport.sin6;
const struct sockaddr_in6 *b = &srx_b->transport.sin6;
diff = memcmp(&a->sin6_addr, &b->sin6_addr, 16);
if (diff == 0) {
port_a = ntohs(a->sin6_port);
port_b = ntohs(b->sin6_port);
diff = port_a - port_b;
}
break;
}
default:
BUG();
}
out:
return diff;
}
/*
* Compare the address lists of a pair of fileservers.
*/
static int afs_compare_fs_alists(const struct afs_server *server_a,
const struct afs_server *server_b)
{
const struct afs_addr_list *la, *lb;
int a = 0, b = 0, addr_matches = 0;
la = rcu_dereference(server_a->addresses);
lb = rcu_dereference(server_b->addresses);
while (a < la->nr_addrs && b < lb->nr_addrs) {
const struct sockaddr_rxrpc *srx_a = &la->addrs[a];
const struct sockaddr_rxrpc *srx_b = &lb->addrs[b];
int diff = afs_compare_addrs(srx_a, srx_b);
if (diff < 0) {
a++;
} else if (diff > 0) {
b++;
} else {
addr_matches++;
a++;
b++;
}
}
return addr_matches;
}
/*
* Compare the fileserver lists of two volumes. The server lists are sorted in
* order of ascending UUID.
*/
static int afs_compare_volume_slists(const struct afs_volume *vol_a,
const struct afs_volume *vol_b)
{
const struct afs_server_list *la, *lb;
int i, a = 0, b = 0, uuid_matches = 0, addr_matches = 0;
la = rcu_dereference(vol_a->servers);
lb = rcu_dereference(vol_b->servers);
for (i = 0; i < AFS_MAXTYPES; i++)
if (la->vids[i] != lb->vids[i])
return 0;
while (a < la->nr_servers && b < lb->nr_servers) {
const struct afs_server *server_a = la->servers[a].server;
const struct afs_server *server_b = lb->servers[b].server;
int diff = memcmp(&server_a->uuid, &server_b->uuid, sizeof(uuid_t));
if (diff < 0) {
a++;
} else if (diff > 0) {
b++;
} else {
uuid_matches++;
addr_matches += afs_compare_fs_alists(server_a, server_b);
a++;
b++;
}
}
_leave(" = %d [um %d]", addr_matches, uuid_matches);
return addr_matches;
}
/*
* Compare root.cell volumes.
*/
static int afs_compare_cell_roots(struct afs_cell *cell)
{
struct afs_cell *p;
_enter("");
rcu_read_lock();
hlist_for_each_entry_rcu(p, &cell->net->proc_cells, proc_link) {
if (p == cell || p->alias_of)
continue;
if (!p->root_volume)
continue; /* Ignore cells that don't have a root.cell volume. */
if (afs_compare_volume_slists(cell->root_volume, p->root_volume) != 0)
goto is_alias;
}
rcu_read_unlock();
_leave(" = 0");
return 0;
is_alias:
rcu_read_unlock();
cell->alias_of = afs_get_cell(p);
return 1;
}
static int afs_do_cell_detect_alias(struct afs_cell *cell, struct key *key)
{
struct afs_volume *root_volume;
_enter("%s", cell->name);
/* Try and get the root.cell volume for comparison with other cells */
root_volume = afs_sample_volume(cell, key, "root.cell", 9);
if (!IS_ERR(root_volume)) {
cell->root_volume = root_volume;
return afs_compare_cell_roots(cell);
}
if (PTR_ERR(root_volume) != -ENOMEDIUM)
return PTR_ERR(root_volume);
/* Okay, this cell doesn't have an root.cell volume. We need to
* locate some other random volume and use that to check.
*/
return -ENOMEDIUM;
}
/*
* Check to see if a new cell is an alias of a cell we already have. At this
* point we have the cell's volume server list.
*
* Returns 0 if we didn't detect an alias, 1 if we found an alias and an error
* if we had problems gathering the data required. In the case the we did
* detect an alias, cell->alias_of is set to point to the assumed master.
*/
int afs_cell_detect_alias(struct afs_cell *cell, struct key *key)
{
struct afs_net *net = cell->net;
int ret;
if (mutex_lock_interruptible(&net->cells_alias_lock) < 0)
return -ERESTARTSYS;
if (test_bit(AFS_CELL_FL_CHECK_ALIAS, &cell->flags)) {
ret = afs_do_cell_detect_alias(cell, key);
if (ret >= 0)
clear_bit_unlock(AFS_CELL_FL_CHECK_ALIAS, &cell->flags);
} else {
ret = cell->alias_of ? 1 : 0;
}
mutex_unlock(&net->cells_alias_lock);
if (ret == 1)
pr_notice("kAFS: Cell %s is an alias of %s\n",
cell->name, cell->alias_of->name);
return ret;
}
...@@ -51,7 +51,7 @@ static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params, ...@@ -51,7 +51,7 @@ static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params,
} }
refcount_set(&slist->usage, 1); refcount_set(&slist->usage, 1);
volume->servers = slist; rcu_assign_pointer(volume->servers, slist);
return volume; return volume;
error_1: error_1:
...@@ -156,7 +156,7 @@ static void afs_destroy_volume(struct afs_net *net, struct afs_volume *volume) ...@@ -156,7 +156,7 @@ static void afs_destroy_volume(struct afs_net *net, struct afs_volume *volume)
ASSERTCMP(volume->cache, ==, NULL); ASSERTCMP(volume->cache, ==, NULL);
#endif #endif
afs_put_serverlist(net, volume->servers); afs_put_serverlist(net, rcu_access_pointer(volume->servers));
afs_put_cell(net, volume->cell); afs_put_cell(net, volume->cell);
kfree(volume); kfree(volume);
...@@ -256,10 +256,11 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key) ...@@ -256,10 +256,11 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key)
write_lock(&volume->servers_lock); write_lock(&volume->servers_lock);
discard = new; discard = new;
old = volume->servers; old = rcu_dereference_protected(volume->servers,
lockdep_is_held(&volume->servers_lock));
if (afs_annotate_server_list(new, old)) { if (afs_annotate_server_list(new, old)) {
new->seq = volume->servers_seq + 1; new->seq = volume->servers_seq + 1;
volume->servers = new; rcu_assign_pointer(volume->servers, new);
smp_wmb(); smp_wmb();
volume->servers_seq++; volume->servers_seq++;
discard = old; discard = old;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment