Commit e7506d34 authored by David S. Miller's avatar David S. Miller

Merge tag 'rxrpc-fixes-20220901' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs

David Howells says:

====================
rxrpc fixes
Here are some fixes for AF_RXRPC:

 (1) Fix the handling of ICMP/ICMP6 packets.  This is a problem due to
     rxrpc being switched to acting as a UDP tunnel, thereby allowing it to
     steal the packets before they go through the UDP Rx queue.  UDP
     tunnels can't get ICMP/ICMP6 packets, however.  This patch adds an
     additional encap hook so that they can.

 (2) Fix the encryption routines in rxkad to handle packets that have more
     than three parts correctly.  The problem is that ->nr_frags doesn't
     count the initial fragment, so the sglist ends up too short.

 (3) Fix a problem with destruction of the local endpoint potentially
     getting repeated.

 (4) Fix the calculation of the time at which to resend.
     jiffies_to_usecs() gives microseconds, not nanoseconds.

 (5) Fix AFS to work out when callback promises and locks expire based on
     the time an op was issued rather than the time the first reply packet
     arrives.  We don't know how long the server took between calculating
     the expiry interval and transmitting the reply.

 (6) Given (5), rxrpc_get_reply_time() is no longer used, so remove it.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 32614006 21457f4a
......@@ -1055,17 +1055,6 @@ The kernel interface functions are as follows:
first function to change. Note that this must be called in TASK_RUNNING
state.
(#) Get reply timestamp::
bool rxrpc_kernel_get_reply_time(struct socket *sock,
struct rxrpc_call *call,
ktime_t *_ts)
This allows the timestamp on the first DATA packet of the reply of a
client call to be queried, provided that it is still in the Rx ring. If
successful, the timestamp will be stored into ``*_ts`` and true will be
returned; false will be returned otherwise.
(#) Get remote client epoch::
u32 rxrpc_kernel_get_epoch(struct socket *sock,
......
......@@ -76,7 +76,7 @@ void afs_lock_op_done(struct afs_call *call)
if (call->error == 0) {
spin_lock(&vnode->lock);
trace_afs_flock_ev(vnode, NULL, afs_flock_timestamp, 0);
vnode->locked_at = call->reply_time;
vnode->locked_at = call->issue_time;
afs_schedule_lock_extension(vnode);
spin_unlock(&vnode->lock);
}
......
......@@ -131,7 +131,7 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
static time64_t xdr_decode_expiry(struct afs_call *call, u32 expiry)
{
return ktime_divns(call->reply_time, NSEC_PER_SEC) + expiry;
return ktime_divns(call->issue_time, NSEC_PER_SEC) + expiry;
}
static void xdr_decode_AFSCallBack(const __be32 **_bp,
......
......@@ -137,7 +137,6 @@ struct afs_call {
bool need_attention; /* T if RxRPC poked us */
bool async; /* T if asynchronous */
bool upgrade; /* T to request service upgrade */
bool have_reply_time; /* T if have got reply_time */
bool intr; /* T if interruptible */
bool unmarshalling_error; /* T if an unmarshalling error occurred */
u16 service_id; /* Actual service ID (after upgrade) */
......@@ -151,7 +150,7 @@ struct afs_call {
} __attribute__((packed));
__be64 tmp64;
};
ktime_t reply_time; /* Time of first reply packet */
ktime_t issue_time; /* Time of issue of operation */
};
struct afs_call_type {
......
......@@ -351,6 +351,7 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp)
if (call->max_lifespan)
rxrpc_kernel_set_max_life(call->net->socket, rxcall,
call->max_lifespan);
call->issue_time = ktime_get_real();
/* send the request */
iov[0].iov_base = call->request;
......@@ -501,12 +502,6 @@ static void afs_deliver_to_call(struct afs_call *call)
return;
}
if (!call->have_reply_time &&
rxrpc_kernel_get_reply_time(call->net->socket,
call->rxcall,
&call->reply_time))
call->have_reply_time = true;
ret = call->type->deliver(call);
state = READ_ONCE(call->state);
if (ret == 0 && call->unmarshalling_error)
......
......@@ -232,8 +232,7 @@ static void xdr_decode_YFSCallBack(const __be32 **_bp,
struct afs_callback *cb = &scb->callback;
ktime_t cb_expiry;
cb_expiry = call->reply_time;
cb_expiry = ktime_add(cb_expiry, xdr_to_u64(x->expiration_time) * 100);
cb_expiry = ktime_add(call->issue_time, xdr_to_u64(x->expiration_time) * 100);
cb->expires_at = ktime_divns(cb_expiry, NSEC_PER_SEC);
scb->have_cb = true;
*_bp += xdr_size(x);
......
......@@ -70,6 +70,7 @@ struct udp_sock {
* For encapsulation sockets.
*/
int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
void (*encap_err_rcv)(struct sock *sk, struct sk_buff *skb, unsigned int udp_offset);
int (*encap_err_lookup)(struct sock *sk, struct sk_buff *skb);
void (*encap_destroy)(struct sock *sk);
......
......@@ -66,8 +66,6 @@ int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t,
void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64);
bool rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *);
u32 rxrpc_kernel_get_epoch(struct socket *, struct rxrpc_call *);
bool rxrpc_kernel_get_reply_time(struct socket *, struct rxrpc_call *,
ktime_t *);
bool rxrpc_kernel_call_is_complete(struct rxrpc_call *);
void rxrpc_kernel_set_max_life(struct socket *, struct rxrpc_call *,
unsigned long);
......
......@@ -67,6 +67,9 @@ static inline int udp_sock_create(struct net *net,
typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb);
typedef int (*udp_tunnel_encap_err_lookup_t)(struct sock *sk,
struct sk_buff *skb);
typedef void (*udp_tunnel_encap_err_rcv_t)(struct sock *sk,
struct sk_buff *skb,
unsigned int udp_offset);
typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk);
typedef struct sk_buff *(*udp_tunnel_gro_receive_t)(struct sock *sk,
struct list_head *head,
......@@ -80,6 +83,7 @@ struct udp_tunnel_sock_cfg {
__u8 encap_type;
udp_tunnel_encap_rcv_t encap_rcv;
udp_tunnel_encap_err_lookup_t encap_err_lookup;
udp_tunnel_encap_err_rcv_t encap_err_rcv;
udp_tunnel_encap_destroy_t encap_destroy;
udp_tunnel_gro_receive_t gro_receive;
udp_tunnel_gro_complete_t gro_complete;
......
......@@ -783,6 +783,8 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
*/
if (tunnel) {
/* ...not for tunnels though: we don't have a sending socket */
if (udp_sk(sk)->encap_err_rcv)
udp_sk(sk)->encap_err_rcv(sk, skb, iph->ihl << 2);
goto out;
}
if (!inet->recverr) {
......
......@@ -72,6 +72,7 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
udp_sk(sk)->encap_type = cfg->encap_type;
udp_sk(sk)->encap_rcv = cfg->encap_rcv;
udp_sk(sk)->encap_err_rcv = cfg->encap_err_rcv;
udp_sk(sk)->encap_err_lookup = cfg->encap_err_lookup;
udp_sk(sk)->encap_destroy = cfg->encap_destroy;
udp_sk(sk)->gro_receive = cfg->gro_receive;
......
......@@ -616,8 +616,11 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
}
/* Tunnels don't have an application socket: don't pass errors back */
if (tunnel)
if (tunnel) {
if (udp_sk(sk)->encap_err_rcv)
udp_sk(sk)->encap_err_rcv(sk, skb, offset);
goto out;
}
if (!np->recverr) {
if (!harderr || sk->sk_state != TCP_ESTABLISHED)
......
......@@ -982,6 +982,7 @@ void rxrpc_send_keepalive(struct rxrpc_peer *);
/*
* peer_event.c
*/
void rxrpc_encap_err_rcv(struct sock *sk, struct sk_buff *skb, unsigned int udp_offset);
void rxrpc_error_report(struct sock *);
void rxrpc_peer_keepalive_worker(struct work_struct *);
......
......@@ -166,7 +166,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
_enter("{%d,%d}", call->tx_hard_ack, call->tx_top);
now = ktime_get_real();
max_age = ktime_sub(now, jiffies_to_usecs(call->peer->rto_j));
max_age = ktime_sub_us(now, jiffies_to_usecs(call->peer->rto_j));
spin_lock_bh(&call->lock);
......
......@@ -137,6 +137,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
tuncfg.encap_type = UDP_ENCAP_RXRPC;
tuncfg.encap_rcv = rxrpc_input_packet;
tuncfg.encap_err_rcv = rxrpc_encap_err_rcv;
tuncfg.sk_user_data = local;
setup_udp_tunnel_sock(net, local->socket, &tuncfg);
......@@ -405,6 +406,9 @@ static void rxrpc_local_processor(struct work_struct *work)
container_of(work, struct rxrpc_local, processor);
bool again;
if (local->dead)
return;
trace_rxrpc_local(local->debug_id, rxrpc_local_processing,
refcount_read(&local->ref), NULL);
......
......@@ -16,22 +16,105 @@
#include <net/sock.h>
#include <net/af_rxrpc.h>
#include <net/ip.h>
#include <net/icmp.h>
#include "ar-internal.h"
static void rxrpc_adjust_mtu(struct rxrpc_peer *, unsigned int);
static void rxrpc_store_error(struct rxrpc_peer *, struct sock_exterr_skb *);
static void rxrpc_distribute_error(struct rxrpc_peer *, int,
enum rxrpc_call_completion);
/*
* Find the peer associated with an ICMP packet.
* Find the peer associated with an ICMPv4 packet.
*/
static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
const struct sk_buff *skb,
struct sk_buff *skb,
unsigned int udp_offset,
unsigned int *info,
struct sockaddr_rxrpc *srx)
{
struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
struct iphdr *ip, *ip0 = ip_hdr(skb);
struct icmphdr *icmp = icmp_hdr(skb);
struct udphdr *udp = (struct udphdr *)(skb->data + udp_offset);
_enter("");
_enter("%u,%u,%u", ip0->protocol, icmp->type, icmp->code);
switch (icmp->type) {
case ICMP_DEST_UNREACH:
*info = ntohs(icmp->un.frag.mtu);
fallthrough;
case ICMP_TIME_EXCEEDED:
case ICMP_PARAMETERPROB:
ip = (struct iphdr *)((void *)icmp + 8);
break;
default:
return NULL;
}
memset(srx, 0, sizeof(*srx));
srx->transport_type = local->srx.transport_type;
srx->transport_len = local->srx.transport_len;
srx->transport.family = local->srx.transport.family;
/* Can we see an ICMP4 packet on an ICMP6 listening socket? and vice
* versa?
*/
switch (srx->transport.family) {
case AF_INET:
srx->transport_len = sizeof(srx->transport.sin);
srx->transport.family = AF_INET;
srx->transport.sin.sin_port = udp->dest;
memcpy(&srx->transport.sin.sin_addr, &ip->daddr,
sizeof(struct in_addr));
break;
#ifdef CONFIG_AF_RXRPC_IPV6
case AF_INET6:
srx->transport_len = sizeof(srx->transport.sin);
srx->transport.family = AF_INET;
srx->transport.sin.sin_port = udp->dest;
memcpy(&srx->transport.sin.sin_addr, &ip->daddr,
sizeof(struct in_addr));
break;
#endif
default:
WARN_ON_ONCE(1);
return NULL;
}
_net("ICMP {%pISp}", &srx->transport);
return rxrpc_lookup_peer_rcu(local, srx);
}
#ifdef CONFIG_AF_RXRPC_IPV6
/*
* Find the peer associated with an ICMPv6 packet.
*/
static struct rxrpc_peer *rxrpc_lookup_peer_icmp6_rcu(struct rxrpc_local *local,
struct sk_buff *skb,
unsigned int udp_offset,
unsigned int *info,
struct sockaddr_rxrpc *srx)
{
struct icmp6hdr *icmp = icmp6_hdr(skb);
struct ipv6hdr *ip, *ip0 = ipv6_hdr(skb);
struct udphdr *udp = (struct udphdr *)(skb->data + udp_offset);
_enter("%u,%u,%u", ip0->nexthdr, icmp->icmp6_type, icmp->icmp6_code);
switch (icmp->icmp6_type) {
case ICMPV6_DEST_UNREACH:
*info = ntohl(icmp->icmp6_mtu);
fallthrough;
case ICMPV6_PKT_TOOBIG:
case ICMPV6_TIME_EXCEED:
case ICMPV6_PARAMPROB:
ip = (struct ipv6hdr *)((void *)icmp + 8);
break;
default:
return NULL;
}
memset(srx, 0, sizeof(*srx));
srx->transport_type = local->srx.transport_type;
......@@ -41,6 +124,165 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
/* Can we see an ICMP4 packet on an ICMP6 listening socket? and vice
* versa?
*/
switch (srx->transport.family) {
case AF_INET:
_net("Rx ICMP6 on v4 sock");
srx->transport_len = sizeof(srx->transport.sin);
srx->transport.family = AF_INET;
srx->transport.sin.sin_port = udp->dest;
memcpy(&srx->transport.sin.sin_addr,
&ip->daddr.s6_addr32[3], sizeof(struct in_addr));
break;
case AF_INET6:
_net("Rx ICMP6");
srx->transport.sin.sin_port = udp->dest;
memcpy(&srx->transport.sin6.sin6_addr, &ip->daddr,
sizeof(struct in6_addr));
break;
default:
WARN_ON_ONCE(1);
return NULL;
}
_net("ICMP {%pISp}", &srx->transport);
return rxrpc_lookup_peer_rcu(local, srx);
}
#endif /* CONFIG_AF_RXRPC_IPV6 */
/*
* Handle an error received on the local endpoint as a tunnel.
*/
void rxrpc_encap_err_rcv(struct sock *sk, struct sk_buff *skb,
unsigned int udp_offset)
{
struct sock_extended_err ee;
struct sockaddr_rxrpc srx;
struct rxrpc_local *local;
struct rxrpc_peer *peer;
unsigned int info = 0;
int err;
u8 version = ip_hdr(skb)->version;
u8 type = icmp_hdr(skb)->type;
u8 code = icmp_hdr(skb)->code;
rcu_read_lock();
local = rcu_dereference_sk_user_data(sk);
if (unlikely(!local)) {
rcu_read_unlock();
return;
}
rxrpc_new_skb(skb, rxrpc_skb_received);
switch (ip_hdr(skb)->version) {
case IPVERSION:
peer = rxrpc_lookup_peer_icmp_rcu(local, skb, udp_offset,
&info, &srx);
break;
#ifdef CONFIG_AF_RXRPC_IPV6
case 6:
peer = rxrpc_lookup_peer_icmp6_rcu(local, skb, udp_offset,
&info, &srx);
break;
#endif
default:
rcu_read_unlock();
return;
}
if (peer && !rxrpc_get_peer_maybe(peer))
peer = NULL;
if (!peer) {
rcu_read_unlock();
return;
}
memset(&ee, 0, sizeof(ee));
switch (version) {
case IPVERSION:
switch (type) {
case ICMP_DEST_UNREACH:
switch (code) {
case ICMP_FRAG_NEEDED:
rxrpc_adjust_mtu(peer, info);
rcu_read_unlock();
rxrpc_put_peer(peer);
return;
default:
break;
}
err = EHOSTUNREACH;
if (code <= NR_ICMP_UNREACH) {
/* Might want to do something different with
* non-fatal errors
*/
//harderr = icmp_err_convert[code].fatal;
err = icmp_err_convert[code].errno;
}
break;
case ICMP_TIME_EXCEEDED:
err = EHOSTUNREACH;
break;
default:
err = EPROTO;
break;
}
ee.ee_origin = SO_EE_ORIGIN_ICMP;
ee.ee_type = type;
ee.ee_code = code;
ee.ee_errno = err;
break;
#ifdef CONFIG_AF_RXRPC_IPV6
case 6:
switch (type) {
case ICMPV6_PKT_TOOBIG:
rxrpc_adjust_mtu(peer, info);
rcu_read_unlock();
rxrpc_put_peer(peer);
return;
}
icmpv6_err_convert(type, code, &err);
if (err == EACCES)
err = EHOSTUNREACH;
ee.ee_origin = SO_EE_ORIGIN_ICMP6;
ee.ee_type = type;
ee.ee_code = code;
ee.ee_errno = err;
break;
#endif
}
trace_rxrpc_rx_icmp(peer, &ee, &srx);
rxrpc_distribute_error(peer, err, RXRPC_CALL_NETWORK_ERROR);
rcu_read_unlock();
rxrpc_put_peer(peer);
}
/*
* Find the peer associated with a local error.
*/
static struct rxrpc_peer *rxrpc_lookup_peer_local_rcu(struct rxrpc_local *local,
const struct sk_buff *skb,
struct sockaddr_rxrpc *srx)
{
struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
_enter("");
memset(srx, 0, sizeof(*srx));
srx->transport_type = local->srx.transport_type;
srx->transport_len = local->srx.transport_len;
srx->transport.family = local->srx.transport.family;
switch (srx->transport.family) {
case AF_INET:
srx->transport_len = sizeof(srx->transport.sin);
......@@ -104,10 +346,8 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
/*
* Handle an MTU/fragmentation problem.
*/
static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, struct sock_exterr_skb *serr)
static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, unsigned int mtu)
{
u32 mtu = serr->ee.ee_info;
_net("Rx ICMP Fragmentation Needed (%d)", mtu);
/* wind down the local interface MTU */
......@@ -148,7 +388,7 @@ void rxrpc_error_report(struct sock *sk)
struct sock_exterr_skb *serr;
struct sockaddr_rxrpc srx;
struct rxrpc_local *local;
struct rxrpc_peer *peer;
struct rxrpc_peer *peer = NULL;
struct sk_buff *skb;
rcu_read_lock();
......@@ -172,41 +412,20 @@ void rxrpc_error_report(struct sock *sk)
}
rxrpc_new_skb(skb, rxrpc_skb_received);
serr = SKB_EXT_ERR(skb);
if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) {
_leave("UDP empty message");
rcu_read_unlock();
rxrpc_free_skb(skb, rxrpc_skb_freed);
return;
}
peer = rxrpc_lookup_peer_icmp_rcu(local, skb, &srx);
if (peer && !rxrpc_get_peer_maybe(peer))
peer = NULL;
if (!peer) {
rcu_read_unlock();
rxrpc_free_skb(skb, rxrpc_skb_freed);
_leave(" [no peer]");
return;
}
trace_rxrpc_rx_icmp(peer, &serr->ee, &srx);
if ((serr->ee.ee_origin == SO_EE_ORIGIN_ICMP &&
serr->ee.ee_type == ICMP_DEST_UNREACH &&
serr->ee.ee_code == ICMP_FRAG_NEEDED)) {
rxrpc_adjust_mtu(peer, serr);
rcu_read_unlock();
rxrpc_free_skb(skb, rxrpc_skb_freed);
rxrpc_put_peer(peer);
_leave(" [MTU update]");
return;
if (serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL) {
peer = rxrpc_lookup_peer_local_rcu(local, skb, &srx);
if (peer && !rxrpc_get_peer_maybe(peer))
peer = NULL;
if (peer) {
trace_rxrpc_rx_icmp(peer, &serr->ee, &srx);
rxrpc_store_error(peer, serr);
}
}
rxrpc_store_error(peer, serr);
rcu_read_unlock();
rxrpc_free_skb(skb, rxrpc_skb_freed);
rxrpc_put_peer(peer);
_leave("");
}
......
......@@ -771,46 +771,3 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
goto out;
}
EXPORT_SYMBOL(rxrpc_kernel_recv_data);
/**
* rxrpc_kernel_get_reply_time - Get timestamp on first reply packet
* @sock: The socket that the call exists on
* @call: The call to query
* @_ts: Where to put the timestamp
*
* Retrieve the timestamp from the first DATA packet of the reply if it is
* in the ring. Returns true if successful, false if not.
*/
bool rxrpc_kernel_get_reply_time(struct socket *sock, struct rxrpc_call *call,
ktime_t *_ts)
{
struct sk_buff *skb;
rxrpc_seq_t hard_ack, top, seq;
bool success = false;
mutex_lock(&call->user_mutex);
if (READ_ONCE(call->state) != RXRPC_CALL_CLIENT_RECV_REPLY)
goto out;
hard_ack = call->rx_hard_ack;
if (hard_ack != 0)
goto out;
seq = hard_ack + 1;
top = smp_load_acquire(&call->rx_top);
if (after(seq, top))
goto out;
skb = call->rxtx_buffer[seq & RXRPC_RXTX_BUFF_MASK];
if (!skb)
goto out;
*_ts = skb_get_ktime(skb);
success = true;
out:
mutex_unlock(&call->user_mutex);
return success;
}
EXPORT_SYMBOL(rxrpc_kernel_get_reply_time);
......@@ -540,7 +540,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
* directly into the target buffer.
*/
sg = _sg;
nsg = skb_shinfo(skb)->nr_frags;
nsg = skb_shinfo(skb)->nr_frags + 1;
if (nsg <= 4) {
nsg = 4;
} else {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment