Commit f953d33b authored by Jakub Kicinski's avatar Jakub Kicinski Committed by David S. Miller

net/tls: add kernel-driven TLS RX resync

TLS offload device may lose sync with the TCP stream if packets
arrive out of order.  Drivers can currently request a resync at
a specific TCP sequence number.  When a record is found starting
at that sequence number kernel will inform the device of the
corresponding record number.

This requires the device to constantly scan the stream for a
known pattern (constant bytes of the header) after sync is lost.

This patch adds an alternative approach which is entirely under
the control of the kernel.  Kernel tracks records it had to fully
decrypt, even though TLS socket is in TLS_HW mode.  If multiple
records did not have any decrypted parts - it's a pretty strong
indication that the device is out of sync.

We choose the min number of fully encrypted records to be 2,
which should hopefully be more than will get retransmitted at
a time.

After kernel decides the device is out of sync it schedules a
resync request.  If the TCP socket is empty the resync gets
performed immediately.  If socket is not empty we leave the
record parser to resync when next record comes.

Before resync in message parser we peek at the TCP socket and
don't attempt the sync if the socket already has some of the
next record queued.

On resync failure (encrypted data continues to flow in) we
retry with exponential backoff, up to once every 128 records
(with a 16k record thats at most once every 2M of data).
Signed-off-by: default avatarJakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: default avatarDirk van der Merwe <dirk.vandermerwe@netronome.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent fe58a5a0
......@@ -268,6 +268,9 @@ Device can only detect that segment 4 also contains a TLS header
if it knows the length of the previous record from segment 2. In this case
the device will lose synchronization with the stream.
Stream scan resynchronization
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
When the device gets out of sync and the stream reaches TCP sequence
numbers more than a max size record past the expected TCP sequence number,
the device starts scanning for a known header pattern. For example
......@@ -298,6 +301,22 @@ Special care has to be taken if the confirmation request is passed
asynchronously to the packet stream and record may get processed
by the kernel before the confirmation request.
Stack-driven resynchronization
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The driver may also request the stack to perform resynchronization
whenever it sees the records are no longer getting decrypted.
If the connection is configured in this mode the stack automatically
schedules resynchronization after it has received two completely encrypted
records.
The stack waits for the socket to drain and informs the device about
the next expected record number and its TCP sequence number. If the
records continue to be received fully encrypted stack retries the
synchronization with an exponential back off (first after 2 encrypted
records, then after 4 records, after 8, after 16... up until every
128 records).
Error handling
==============
......
......@@ -303,10 +303,33 @@ struct tlsdev_ops {
struct sock *sk, u32 seq, u8 *rcd_sn);
};
enum tls_offload_sync_type {
TLS_OFFLOAD_SYNC_TYPE_DRIVER_REQ = 0,
TLS_OFFLOAD_SYNC_TYPE_CORE_NEXT_HINT = 1,
};
#define TLS_DEVICE_RESYNC_NH_START_IVAL 2
#define TLS_DEVICE_RESYNC_NH_MAX_IVAL 128
struct tls_offload_context_rx {
/* sw must be the first member of tls_offload_context_rx */
struct tls_sw_context_rx sw;
enum tls_offload_sync_type resync_type;
/* this member is set regardless of resync_type, to avoid branches */
u8 resync_nh_reset:1;
/* CORE_NEXT_HINT-only member, but use the hole here */
u8 resync_nh_do_now:1;
union {
/* TLS_OFFLOAD_SYNC_TYPE_DRIVER_REQ */
struct {
atomic64_t resync_req;
};
/* TLS_OFFLOAD_SYNC_TYPE_CORE_NEXT_HINT */
struct {
u32 decrypted_failed;
u32 decrypted_tgt;
} resync_nh;
};
u8 driver_state[] __aligned(8);
/* The TLS layer reserves room for driver specific state
* Currently the belief is that there is not enough
......@@ -587,6 +610,13 @@ static inline void tls_offload_rx_resync_request(struct sock *sk, __be32 seq)
atomic64_set(&rx_ctx->resync_req, ((u64)ntohl(seq) << 32) | 1);
}
static inline void
tls_offload_rx_resync_set_type(struct sock *sk, enum tls_offload_sync_type type)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
tls_offload_ctx_rx(tls_ctx)->resync_type = type;
}
int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg,
unsigned char *record_type);
......@@ -608,6 +638,6 @@ int tls_sw_fallback_init(struct sock *sk,
int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx);
void tls_device_offload_cleanup_rx(struct sock *sk);
void tls_device_rx_resync_new_rec(struct sock *sk, u32 seq);
void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq);
#endif /* _TLS_OFFLOAD_H */
......@@ -563,10 +563,12 @@ static void tls_device_resync_rx(struct tls_context *tls_ctx,
clear_bit_unlock(TLS_RX_SYNC_RUNNING, &tls_ctx->flags);
}
void tls_device_rx_resync_new_rec(struct sock *sk, u32 seq)
void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_offload_context_rx *rx_ctx;
u8 rcd_sn[TLS_MAX_REC_SEQ_SIZE];
struct tls_prot_info *prot;
u32 is_req_pending;
s64 resync_req;
u32 req_seq;
......@@ -574,15 +576,84 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 seq)
if (tls_ctx->rx_conf != TLS_HW)
return;
prot = &tls_ctx->prot_info;
rx_ctx = tls_offload_ctx_rx(tls_ctx);
memcpy(rcd_sn, tls_ctx->rx.rec_seq, prot->rec_seq_size);
switch (rx_ctx->resync_type) {
case TLS_OFFLOAD_SYNC_TYPE_DRIVER_REQ:
resync_req = atomic64_read(&rx_ctx->resync_req);
req_seq = resync_req >> 32;
seq += TLS_HEADER_SIZE - 1;
is_req_pending = resync_req;
if (unlikely(is_req_pending) && req_seq == seq &&
atomic64_try_cmpxchg(&rx_ctx->resync_req, &resync_req, 0))
tls_device_resync_rx(tls_ctx, sk, seq, tls_ctx->rx.rec_seq);
if (likely(!is_req_pending) || req_seq != seq ||
!atomic64_try_cmpxchg(&rx_ctx->resync_req, &resync_req, 0))
return;
break;
case TLS_OFFLOAD_SYNC_TYPE_CORE_NEXT_HINT:
if (likely(!rx_ctx->resync_nh_do_now))
return;
/* head of next rec is already in, note that the sock_inq will
* include the currently parsed message when called from parser
*/
if (tcp_inq(sk) > rcd_len)
return;
rx_ctx->resync_nh_do_now = 0;
seq += rcd_len;
tls_bigint_increment(rcd_sn, prot->rec_seq_size);
break;
}
tls_device_resync_rx(tls_ctx, sk, seq, rcd_sn);
}
static void tls_device_core_ctrl_rx_resync(struct tls_context *tls_ctx,
struct tls_offload_context_rx *ctx,
struct sock *sk, struct sk_buff *skb)
{
struct strp_msg *rxm;
/* device will request resyncs by itself based on stream scan */
if (ctx->resync_type != TLS_OFFLOAD_SYNC_TYPE_CORE_NEXT_HINT)
return;
/* already scheduled */
if (ctx->resync_nh_do_now)
return;
/* seen decrypted fragments since last fully-failed record */
if (ctx->resync_nh_reset) {
ctx->resync_nh_reset = 0;
ctx->resync_nh.decrypted_failed = 1;
ctx->resync_nh.decrypted_tgt = TLS_DEVICE_RESYNC_NH_START_IVAL;
return;
}
if (++ctx->resync_nh.decrypted_failed <= ctx->resync_nh.decrypted_tgt)
return;
/* doing resync, bump the next target in case it fails */
if (ctx->resync_nh.decrypted_tgt < TLS_DEVICE_RESYNC_NH_MAX_IVAL)
ctx->resync_nh.decrypted_tgt *= 2;
else
ctx->resync_nh.decrypted_tgt += TLS_DEVICE_RESYNC_NH_MAX_IVAL;
rxm = strp_msg(skb);
/* head of next rec is already in, parser will sync for us */
if (tcp_inq(sk) > rxm->full_len) {
ctx->resync_nh_do_now = 1;
} else {
struct tls_prot_info *prot = &tls_ctx->prot_info;
u8 rcd_sn[TLS_MAX_REC_SEQ_SIZE];
memcpy(rcd_sn, tls_ctx->rx.rec_seq, prot->rec_seq_size);
tls_bigint_increment(rcd_sn, prot->rec_seq_size);
tls_device_resync_rx(tls_ctx, sk, tcp_sk(sk)->copied_seq,
rcd_sn);
}
}
static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb)
......@@ -686,12 +757,21 @@ int tls_device_decrypted(struct sock *sk, struct sk_buff *skb)
ctx->sw.decrypted |= is_decrypted;
/* Return immedeatly if the record is either entirely plaintext or
/* Return immediately if the record is either entirely plaintext or
* entirely ciphertext. Otherwise handle reencrypt partially decrypted
* record.
*/
return (is_encrypted || is_decrypted) ? 0 :
tls_device_reencrypt(sk, skb);
if (is_decrypted) {
ctx->resync_nh_reset = 1;
return 0;
}
if (is_encrypted) {
tls_device_core_ctrl_rx_resync(tls_ctx, ctx, sk, skb);
return 0;
}
ctx->resync_nh_reset = 1;
return tls_device_reencrypt(sk, skb);
}
static void tls_device_attach(struct tls_context *ctx, struct sock *sk,
......@@ -917,6 +997,7 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx)
rc = -ENOMEM;
goto release_netdev;
}
context->resync_nh_reset = 1;
ctx->priv_ctx_rx = context;
rc = tls_set_sw_offload(sk, ctx, 0);
......
......@@ -2015,7 +2015,7 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
goto read_failure;
}
#ifdef CONFIG_TLS_DEVICE
tls_device_rx_resync_new_rec(strp->sk,
tls_device_rx_resync_new_rec(strp->sk, data_len + TLS_HEADER_SIZE,
TCP_SKB_CB(skb)->seq + rxm->offset);
#endif
return data_len + TLS_HEADER_SIZE;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment