Commit f0e834e1 authored by David S. Miller's avatar David S. Miller

Merge tag 'rxrpc-next-20181004' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs

David Howells says:

====================
rxrpc: Development

Here are some development patches for AF_RXRPC.  The most significant points
are:

 (1) Change the tracepoint that indicates a packet has been transmitted
     into one that indicates a packet is about to be transmitted.  Without
     this, the response tracepoint may occur first if the round trip is
     fast enough.

 (2) Sort out AFS address list handling to better enforce maximum capacity
     to use helper functions to fill them and to do an insertion sort to
     order them.  This is here to make (3) easier.

 (3) Keep AF_INET addresses as AF_INET addresses rather than converting
     them to AF_INET6 in both AF_RXRPC and kAFS.  I hadn't realised that a
     UDP6 socket would just call down into UDP4 if given an AF_INET
     address.

 (4) Allow the timestamp on the first DATA packet of a reply to be
     retrieved by a kernel service.  This will give the kAFS a more
     accurate base from which to calculate the callback promise expiration.

 (5) Allow the rxrpc protocol epoch value to be retrieved from an incoming
     call.  This will allow kAFS to determine if the fileserver restarted
     and if two addresses apparently assigned to the same fileserver
     actually are different boxes.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents bbb4c432 e908bcf4
......@@ -1069,6 +1069,31 @@ The kernel interface functions are as follows:
This function may transmit a PING ACK.
(*) Get reply timestamp.
bool rxrpc_kernel_get_reply_time(struct socket *sock,
struct rxrpc_call *call,
ktime_t *_ts)
This allows the timestamp on the first DATA packet of the reply of a
client call to be queried, provided that it is still in the Rx ring. If
successful, the timestamp will be stored into *_ts and true will be
returned; false will be returned otherwise.
(*) Get remote client epoch.
u32 rxrpc_kernel_get_epoch(struct socket *sock,
struct rxrpc_call *call)
This allows the epoch that's contained in packets of an incoming client
call to be queried. This value is returned. The function always
successful if the call is still in progress. It shouldn't be called once
the call has expired. Note that calling this on a local client call only
returns the local epoch.
This value can be used to determine if the remote client has been
restarted as it shouldn't change otherwise.
=======================
CONFIGURABLE PARAMETERS
......
......@@ -17,11 +17,6 @@
#include "internal.h"
#include "afs_fs.h"
//#define AFS_MAX_ADDRESSES
// ((unsigned int)((PAGE_SIZE - sizeof(struct afs_addr_list)) /
// sizeof(struct sockaddr_rxrpc)))
#define AFS_MAX_ADDRESSES ((unsigned int)(sizeof(unsigned long) * 8))
/*
* Release an address list.
*/
......@@ -43,11 +38,15 @@ struct afs_addr_list *afs_alloc_addrlist(unsigned int nr,
_enter("%u,%u,%u", nr, service, port);
if (nr > AFS_MAX_ADDRESSES)
nr = AFS_MAX_ADDRESSES;
alist = kzalloc(struct_size(alist, addrs, nr), GFP_KERNEL);
if (!alist)
return NULL;
refcount_set(&alist->usage, 1);
alist->max_addrs = nr;
for (i = 0; i < nr; i++) {
struct sockaddr_rxrpc *srx = &alist->addrs[i];
......@@ -109,8 +108,6 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
} while (p < end);
_debug("%u/%u addresses", nr, AFS_MAX_ADDRESSES);
if (nr > AFS_MAX_ADDRESSES)
nr = AFS_MAX_ADDRESSES;
alist = afs_alloc_addrlist(nr, service, port);
if (!alist)
......@@ -119,8 +116,10 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
/* Extract the addresses */
p = text;
do {
struct sockaddr_rxrpc *srx = &alist->addrs[alist->nr_addrs];
const char *q, *stop;
unsigned int xport = port;
__be32 x[4];
int family;
if (*p == delim) {
p++;
......@@ -136,19 +135,12 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
break;
}
if (in4_pton(p, q - p,
(u8 *)&srx->transport.sin6.sin6_addr.s6_addr32[3],
-1, &stop)) {
srx->transport.sin6.sin6_addr.s6_addr32[0] = 0;
srx->transport.sin6.sin6_addr.s6_addr32[1] = 0;
srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
} else if (in6_pton(p, q - p,
srx->transport.sin6.sin6_addr.s6_addr,
-1, &stop)) {
/* Nothing to do */
} else {
if (in4_pton(p, q - p, (u8 *)&x[0], -1, &stop))
family = AF_INET;
else if (in6_pton(p, q - p, (u8 *)x, -1, &stop))
family = AF_INET6;
else
goto bad_address;
}
if (stop != q)
goto bad_address;
......@@ -160,7 +152,7 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
if (p < end) {
if (*p == '+') {
/* Port number specification "+1234" */
unsigned int xport = 0;
xport = 0;
p++;
if (p >= end || !isdigit(*p))
goto bad_address;
......@@ -171,7 +163,6 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
goto bad_address;
p++;
} while (p < end && isdigit(*p));
srx->transport.sin6.sin6_port = htons(xport);
} else if (*p == delim) {
p++;
} else {
......@@ -179,8 +170,12 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
}
}
alist->nr_addrs++;
} while (p < end && alist->nr_addrs < AFS_MAX_ADDRESSES);
if (family == AF_INET)
afs_merge_fs_addr4(alist, x[0], xport);
else
afs_merge_fs_addr6(alist, x, xport);
} while (p < end);
_leave(" = [nr %u]", alist->nr_addrs);
return alist;
......@@ -237,19 +232,23 @@ struct afs_addr_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry)
*/
void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port)
{
struct sockaddr_in6 *a;
__be16 xport = htons(port);
struct sockaddr_rxrpc *srx;
u32 addr = ntohl(xdr);
int i;
if (alist->nr_addrs >= alist->max_addrs)
return;
for (i = 0; i < alist->nr_ipv4; i++) {
a = &alist->addrs[i].transport.sin6;
if (xdr == a->sin6_addr.s6_addr32[3] &&
xport == a->sin6_port)
struct sockaddr_in *a = &alist->addrs[i].transport.sin;
u32 a_addr = ntohl(a->sin_addr.s_addr);
u16 a_port = ntohs(a->sin_port);
if (addr == a_addr && port == a_port)
return;
if (xdr == a->sin6_addr.s6_addr32[3] &&
(u16 __force)xport < (u16 __force)a->sin6_port)
if (addr == a_addr && port < a_port)
break;
if ((u32 __force)xdr < (u32 __force)a->sin6_addr.s6_addr32[3])
if (addr < a_addr)
break;
}
......@@ -258,12 +257,11 @@ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port)
alist->addrs + i,
sizeof(alist->addrs[0]) * (alist->nr_addrs - i));
a = &alist->addrs[i].transport.sin6;
a->sin6_port = xport;
a->sin6_addr.s6_addr32[0] = 0;
a->sin6_addr.s6_addr32[1] = 0;
a->sin6_addr.s6_addr32[2] = htonl(0xffff);
a->sin6_addr.s6_addr32[3] = xdr;
srx = &alist->addrs[i];
srx->transport_len = sizeof(srx->transport.sin);
srx->transport.sin.sin_family = AF_INET;
srx->transport.sin.sin_port = htons(port);
srx->transport.sin.sin_addr.s_addr = xdr;
alist->nr_ipv4++;
alist->nr_addrs++;
}
......@@ -273,18 +271,20 @@ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port)
*/
void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port)
{
struct sockaddr_in6 *a;
__be16 xport = htons(port);
struct sockaddr_rxrpc *srx;
int i, diff;
if (alist->nr_addrs >= alist->max_addrs)
return;
for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
a = &alist->addrs[i].transport.sin6;
struct sockaddr_in6 *a = &alist->addrs[i].transport.sin6;
u16 a_port = ntohs(a->sin6_port);
diff = memcmp(xdr, &a->sin6_addr, 16);
if (diff == 0 &&
xport == a->sin6_port)
if (diff == 0 && port == a_port)
return;
if (diff == 0 &&
(u16 __force)xport < (u16 __force)a->sin6_port)
if (diff == 0 && port < a_port)
break;
if (diff < 0)
break;
......@@ -295,12 +295,11 @@ void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port)
alist->addrs + i,
sizeof(alist->addrs[0]) * (alist->nr_addrs - i));
a = &alist->addrs[i].transport.sin6;
a->sin6_port = xport;
a->sin6_addr.s6_addr32[0] = xdr[0];
a->sin6_addr.s6_addr32[1] = xdr[1];
a->sin6_addr.s6_addr32[2] = xdr[2];
a->sin6_addr.s6_addr32[3] = xdr[3];
srx = &alist->addrs[i];
srx->transport_len = sizeof(srx->transport.sin6);
srx->transport.sin6.sin6_family = AF_INET6;
srx->transport.sin6.sin6_port = htons(port);
memcpy(&srx->transport.sin6.sin6_addr, xdr, 16);
alist->nr_addrs++;
}
......
......@@ -73,12 +73,14 @@ struct afs_addr_list {
struct rcu_head rcu; /* Must be first */
refcount_t usage;
u32 version; /* Version */
unsigned short nr_addrs;
unsigned short index; /* Address currently in use */
unsigned short nr_ipv4; /* Number of IPv4 addresses */
unsigned char max_addrs;
unsigned char nr_addrs;
unsigned char index; /* Address currently in use */
unsigned char nr_ipv4; /* Number of IPv4 addresses */
unsigned long probed; /* Mask of servers that have been probed */
unsigned long yfs; /* Mask of servers that are YFS */
struct sockaddr_rxrpc addrs[];
#define AFS_MAX_ADDRESSES ((unsigned int)(sizeof(unsigned long) * 8))
};
/*
......
......@@ -13,6 +13,7 @@
#define _NET_RXRPC_H
#include <linux/rxrpc.h>
#include <linux/ktime.h>
struct key;
struct sock;
......@@ -77,5 +78,8 @@ int rxrpc_kernel_retry_call(struct socket *, struct rxrpc_call *,
int rxrpc_kernel_check_call(struct socket *, struct rxrpc_call *,
enum rxrpc_call_completion *, u32 *);
u32 rxrpc_kernel_check_life(struct socket *, struct rxrpc_call *);
u32 rxrpc_kernel_get_epoch(struct socket *, struct rxrpc_call *);
bool rxrpc_kernel_get_reply_time(struct socket *, struct rxrpc_call *,
ktime_t *);
#endif /* _NET_RXRPC_H */
......@@ -97,7 +97,8 @@ static int rxrpc_validate_address(struct rxrpc_sock *rx,
srx->transport_len > len)
return -EINVAL;
if (srx->transport.family != rx->family)
if (srx->transport.family != rx->family &&
srx->transport.family == AF_INET && rx->family != AF_INET6)
return -EAFNOSUPPORT;
switch (srx->transport.family) {
......@@ -384,6 +385,20 @@ u32 rxrpc_kernel_check_life(struct socket *sock, struct rxrpc_call *call)
}
EXPORT_SYMBOL(rxrpc_kernel_check_life);
/**
* rxrpc_kernel_get_epoch - Retrieve the epoch value from a call.
* @sock: The socket the call is on
* @call: The call to query
*
* Allow a kernel service to retrieve the epoch value from a service call to
* see if the client at the other end rebooted.
*/
u32 rxrpc_kernel_get_epoch(struct socket *sock, struct rxrpc_call *call)
{
return call->conn->proto.epoch;
}
EXPORT_SYMBOL(rxrpc_kernel_get_epoch);
/**
* rxrpc_kernel_check_call - Check a call's state
* @sock: The socket the call is on
......
......@@ -1095,7 +1095,6 @@ void rxrpc_new_skb(struct sk_buff *, enum rxrpc_skb_trace);
void rxrpc_see_skb(struct sk_buff *, enum rxrpc_skb_trace);
void rxrpc_get_skb(struct sk_buff *, enum rxrpc_skb_trace);
void rxrpc_free_skb(struct sk_buff *, enum rxrpc_skb_trace);
void rxrpc_lose_skb(struct sk_buff *, enum rxrpc_skb_trace);
void rxrpc_purge_queue(struct sk_buff_head *);
/*
......@@ -1112,8 +1111,7 @@ static inline void rxrpc_sysctl_exit(void) {}
/*
* utils.c
*/
int rxrpc_extract_addr_from_skb(struct rxrpc_local *, struct sockaddr_rxrpc *,
struct sk_buff *);
int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *);
static inline bool before(u32 seq1, u32 seq2)
{
......
......@@ -280,7 +280,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
peer = NULL;
if (!peer) {
peer = b->peer_backlog[peer_tail];
if (rxrpc_extract_addr_from_skb(local, &peer->srx, skb) < 0)
if (rxrpc_extract_addr_from_skb(&peer->srx, skb) < 0)
return NULL;
b->peer_backlog[peer_tail] = NULL;
smp_store_release(&b->peer_backlog_tail,
......
......@@ -86,11 +86,12 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local,
_enter(",%x", sp->hdr.cid & RXRPC_CIDMASK);
if (rxrpc_extract_addr_from_skb(local, &srx, skb) < 0)
if (rxrpc_extract_addr_from_skb(&srx, skb) < 0)
goto not_found;
/* We may have to handle mixing IPv4 and IPv6 */
if (srx.transport.family != local->srx.transport.family) {
if (srx.transport.family != local->srx.transport.family &&
(srx.transport.family == AF_INET &&
local->srx.transport.family != AF_INET6)) {
pr_warn_ratelimited("AF_RXRPC: Protocol mismatch %u not %u\n",
srx.transport.family,
local->srx.transport.family);
......
......@@ -1176,7 +1176,7 @@ void rxrpc_data_ready(struct sock *udp_sk)
static int lose;
if ((lose++ & 7) == 7) {
trace_rxrpc_rx_lose(sp);
rxrpc_lose_skb(skb, rxrpc_skb_rx_lost);
rxrpc_free_skb(skb, rxrpc_skb_rx_lost);
return;
}
}
......
......@@ -39,7 +39,7 @@ static void rxrpc_send_version_request(struct rxrpc_local *local,
_enter("");
if (rxrpc_extract_addr_from_skb(local, &srx, skb) < 0)
if (rxrpc_extract_addr_from_skb(&srx, skb) < 0)
return;
msg.msg_name = &srx.transport;
......
......@@ -378,11 +378,13 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
if ((lose++ & 7) == 7) {
ret = 0;
lost = true;
goto done;
}
}
_proto("Tx DATA %%%u { #%u }", serial, sp->hdr.seq);
trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags,
retrans, lost);
if (lost)
goto done;
/* send the packet with the don't fragment bit set if we currently
* think it's small enough */
......@@ -415,8 +417,6 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
goto send_fragmentable;
done:
trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags,
retrans, lost);
if (ret >= 0) {
if (whdr.flags & RXRPC_REQUEST_ACK) {
call->peer->rtt_last_req = skb->tstamp;
......@@ -561,7 +561,7 @@ void rxrpc_reject_packets(struct rxrpc_local *local)
continue;
}
if (rxrpc_extract_addr_from_skb(local, &srx, skb) == 0) {
if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) {
msg.msg_namelen = srx.transport_len;
whdr.epoch = htonl(sp->hdr.epoch);
......
......@@ -47,6 +47,8 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
*/
switch (srx->transport.family) {
case AF_INET:
srx->transport_len = sizeof(srx->transport.sin);
srx->transport.family = AF_INET;
srx->transport.sin.sin_port = serr->port;
switch (serr->ee.ee_origin) {
case SO_EE_ORIGIN_ICMP:
......@@ -70,20 +72,20 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
#ifdef CONFIG_AF_RXRPC_IPV6
case AF_INET6:
srx->transport.sin6.sin6_port = serr->port;
switch (serr->ee.ee_origin) {
case SO_EE_ORIGIN_ICMP6:
_net("Rx ICMP6");
srx->transport.sin6.sin6_port = serr->port;
memcpy(&srx->transport.sin6.sin6_addr,
skb_network_header(skb) + serr->addr_offset,
sizeof(struct in6_addr));
break;
case SO_EE_ORIGIN_ICMP:
_net("Rx ICMP on v6 sock");
srx->transport.sin6.sin6_addr.s6_addr32[0] = 0;
srx->transport.sin6.sin6_addr.s6_addr32[1] = 0;
srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
memcpy(srx->transport.sin6.sin6_addr.s6_addr + 12,
srx->transport_len = sizeof(srx->transport.sin);
srx->transport.family = AF_INET;
srx->transport.sin.sin_port = serr->port;
memcpy(&srx->transport.sin.sin_addr,
skb_network_header(skb) + serr->addr_offset,
sizeof(struct in_addr));
break;
......
......@@ -715,3 +715,46 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
goto out;
}
EXPORT_SYMBOL(rxrpc_kernel_recv_data);
/**
* rxrpc_kernel_get_reply_time - Get timestamp on first reply packet
* @sock: The socket that the call exists on
* @call: The call to query
* @_ts: Where to put the timestamp
*
* Retrieve the timestamp from the first DATA packet of the reply if it is
* in the ring. Returns true if successful, false if not.
*/
bool rxrpc_kernel_get_reply_time(struct socket *sock, struct rxrpc_call *call,
ktime_t *_ts)
{
struct sk_buff *skb;
rxrpc_seq_t hard_ack, top, seq;
bool success = false;
mutex_lock(&call->user_mutex);
if (READ_ONCE(call->state) != RXRPC_CALL_CLIENT_RECV_REPLY)
goto out;
hard_ack = call->rx_hard_ack;
if (hard_ack != 0)
goto out;
seq = hard_ack + 1;
top = smp_load_acquire(&call->rx_top);
if (after(seq, top))
goto out;
skb = call->rxtx_buffer[seq & RXRPC_RXTX_BUFF_MASK];
if (!skb)
goto out;
*_ts = skb_get_ktime(skb);
success = true;
out:
mutex_unlock(&call->user_mutex);
return success;
}
EXPORT_SYMBOL(rxrpc_kernel_get_reply_time);
......@@ -68,21 +68,6 @@ void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
}
}
/*
* Note the injected loss of a socket buffer.
*/
void rxrpc_lose_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
{
const void *here = __builtin_return_address(0);
if (skb) {
int n;
CHECK_SLAB_OKAY(&skb->users);
n = atomic_dec_return(select_skb_count(op));
trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
kfree_skb(skb);
}
}
/*
* Clear a queue of socket buffers.
*/
......
......@@ -17,28 +17,17 @@
/*
* Fill out a peer address from a socket buffer containing a packet.
*/
int rxrpc_extract_addr_from_skb(struct rxrpc_local *local,
struct sockaddr_rxrpc *srx,
struct sk_buff *skb)
int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *srx, struct sk_buff *skb)
{
memset(srx, 0, sizeof(*srx));
switch (ntohs(skb->protocol)) {
case ETH_P_IP:
if (local->srx.transport.family == AF_INET6) {
srx->transport_type = SOCK_DGRAM;
srx->transport_len = sizeof(srx->transport.sin6);
srx->transport.sin6.sin6_family = AF_INET6;
srx->transport.sin6.sin6_port = udp_hdr(skb)->source;
srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
srx->transport.sin6.sin6_addr.s6_addr32[3] = ip_hdr(skb)->saddr;
} else {
srx->transport_type = SOCK_DGRAM;
srx->transport_len = sizeof(srx->transport.sin);
srx->transport.sin.sin_family = AF_INET;
srx->transport.sin.sin_port = udp_hdr(skb)->source;
srx->transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
}
srx->transport_type = SOCK_DGRAM;
srx->transport_len = sizeof(srx->transport.sin);
srx->transport.sin.sin_family = AF_INET;
srx->transport.sin.sin_port = udp_hdr(skb)->source;
srx->transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
return 0;
#ifdef CONFIG_AF_RXRPC_IPV6
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment