Commit 6703a605 authored by David S. Miller's avatar David S. Miller

Merge branch 'net-tls-small-TX-offload-optimizations'

Jakub Kicinski says:

====================
net/tls: small TX offload optimizations

This set brings small TLS TX device optimizations. The biggest
gain comes from fixing a misuse of non temporal copy instructions.
On a synthetic workload modelled after customer's RFC application
I see 3-5% percent gain.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents fcd8c627 e681cc60
...@@ -122,13 +122,10 @@ static struct net_device *get_netdev_for_sock(struct sock *sk) ...@@ -122,13 +122,10 @@ static struct net_device *get_netdev_for_sock(struct sock *sk)
static void destroy_record(struct tls_record_info *record) static void destroy_record(struct tls_record_info *record)
{ {
int nr_frags = record->num_frags; int i;
skb_frag_t *frag;
while (nr_frags-- > 0) { for (i = 0; i < record->num_frags; i++)
frag = &record->frags[nr_frags]; __skb_frag_unref(&record->frags[i]);
__skb_frag_unref(frag);
}
kfree(record); kfree(record);
} }
...@@ -259,33 +256,15 @@ static int tls_push_record(struct sock *sk, ...@@ -259,33 +256,15 @@ static int tls_push_record(struct sock *sk,
struct tls_context *ctx, struct tls_context *ctx,
struct tls_offload_context_tx *offload_ctx, struct tls_offload_context_tx *offload_ctx,
struct tls_record_info *record, struct tls_record_info *record,
struct page_frag *pfrag, int flags)
int flags,
unsigned char record_type)
{ {
struct tls_prot_info *prot = &ctx->prot_info; struct tls_prot_info *prot = &ctx->prot_info;
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
struct page_frag dummy_tag_frag;
skb_frag_t *frag; skb_frag_t *frag;
int i; int i;
/* fill prepend */
frag = &record->frags[0];
tls_fill_prepend(ctx,
skb_frag_address(frag),
record->len - prot->prepend_size,
record_type,
prot->version);
/* HW doesn't care about the data in the tag, because it fills it. */
dummy_tag_frag.page = skb_frag_page(frag);
dummy_tag_frag.offset = 0;
tls_append_frag(record, &dummy_tag_frag, prot->tag_size);
record->end_seq = tp->write_seq + record->len; record->end_seq = tp->write_seq + record->len;
spin_lock_irq(&offload_ctx->lock); list_add_tail_rcu(&record->list, &offload_ctx->records_list);
list_add_tail(&record->list, &offload_ctx->records_list);
spin_unlock_irq(&offload_ctx->lock);
offload_ctx->open_record = NULL; offload_ctx->open_record = NULL;
if (test_bit(TLS_TX_SYNC_SCHED, &ctx->flags)) if (test_bit(TLS_TX_SYNC_SCHED, &ctx->flags))
...@@ -307,6 +286,38 @@ static int tls_push_record(struct sock *sk, ...@@ -307,6 +286,38 @@ static int tls_push_record(struct sock *sk,
return tls_push_sg(sk, ctx, offload_ctx->sg_tx_data, 0, flags); return tls_push_sg(sk, ctx, offload_ctx->sg_tx_data, 0, flags);
} }
static int tls_device_record_close(struct sock *sk,
struct tls_context *ctx,
struct tls_record_info *record,
struct page_frag *pfrag,
unsigned char record_type)
{
struct tls_prot_info *prot = &ctx->prot_info;
int ret;
/* append tag
* device will fill in the tag, we just need to append a placeholder
* use socket memory to improve coalescing (re-using a single buffer
* increases frag count)
* if we can't allocate memory now, steal some back from data
*/
if (likely(skb_page_frag_refill(prot->tag_size, pfrag,
sk->sk_allocation))) {
ret = 0;
tls_append_frag(record, pfrag, prot->tag_size);
} else {
ret = prot->tag_size;
if (record->len <= prot->overhead_size)
return -ENOMEM;
}
/* fill prepend */
tls_fill_prepend(ctx, skb_frag_address(&record->frags[0]),
record->len - prot->overhead_size,
record_type, prot->version);
return ret;
}
static int tls_create_new_record(struct tls_offload_context_tx *offload_ctx, static int tls_create_new_record(struct tls_offload_context_tx *offload_ctx,
struct page_frag *pfrag, struct page_frag *pfrag,
size_t prepend_size) size_t prepend_size)
...@@ -361,6 +372,31 @@ static int tls_do_allocation(struct sock *sk, ...@@ -361,6 +372,31 @@ static int tls_do_allocation(struct sock *sk,
return 0; return 0;
} }
static int tls_device_copy_data(void *addr, size_t bytes, struct iov_iter *i)
{
size_t pre_copy, nocache;
pre_copy = ~((unsigned long)addr - 1) & (SMP_CACHE_BYTES - 1);
if (pre_copy) {
pre_copy = min(pre_copy, bytes);
if (copy_from_iter(addr, pre_copy, i) != pre_copy)
return -EFAULT;
bytes -= pre_copy;
addr += pre_copy;
}
nocache = round_down(bytes, SMP_CACHE_BYTES);
if (copy_from_iter_nocache(addr, nocache, i) != nocache)
return -EFAULT;
bytes -= nocache;
addr += nocache;
if (bytes && copy_from_iter(addr, bytes, i) != bytes)
return -EFAULT;
return 0;
}
static int tls_push_data(struct sock *sk, static int tls_push_data(struct sock *sk,
struct iov_iter *msg_iter, struct iov_iter *msg_iter,
size_t size, int flags, size_t size, int flags,
...@@ -434,12 +470,10 @@ static int tls_push_data(struct sock *sk, ...@@ -434,12 +470,10 @@ static int tls_push_data(struct sock *sk,
copy = min_t(size_t, size, (pfrag->size - pfrag->offset)); copy = min_t(size_t, size, (pfrag->size - pfrag->offset));
copy = min_t(size_t, copy, (max_open_record_len - record->len)); copy = min_t(size_t, copy, (max_open_record_len - record->len));
if (copy_from_iter_nocache(page_address(pfrag->page) + rc = tls_device_copy_data(page_address(pfrag->page) +
pfrag->offset, pfrag->offset, copy, msg_iter);
copy, msg_iter) != copy) { if (rc)
rc = -EFAULT;
goto handle_error; goto handle_error;
}
tls_append_frag(record, pfrag, copy); tls_append_frag(record, pfrag, copy);
size -= copy; size -= copy;
...@@ -457,13 +491,24 @@ static int tls_push_data(struct sock *sk, ...@@ -457,13 +491,24 @@ static int tls_push_data(struct sock *sk,
if (done || record->len >= max_open_record_len || if (done || record->len >= max_open_record_len ||
(record->num_frags >= MAX_SKB_FRAGS - 1)) { (record->num_frags >= MAX_SKB_FRAGS - 1)) {
rc = tls_device_record_close(sk, tls_ctx, record,
pfrag, record_type);
if (rc) {
if (rc > 0) {
size += rc;
} else {
size = orig_size;
destroy_record(record);
ctx->open_record = NULL;
break;
}
}
rc = tls_push_record(sk, rc = tls_push_record(sk,
tls_ctx, tls_ctx,
ctx, ctx,
record, record,
pfrag, tls_push_record_flags);
tls_push_record_flags,
record_type);
if (rc < 0) if (rc < 0)
break; break;
} }
...@@ -538,12 +583,16 @@ struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context, ...@@ -538,12 +583,16 @@ struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context,
/* if retransmit_hint is irrelevant start /* if retransmit_hint is irrelevant start
* from the beggining of the list * from the beggining of the list
*/ */
info = list_first_entry(&context->records_list, info = list_first_entry_or_null(&context->records_list,
struct tls_record_info, list); struct tls_record_info, list);
if (!info)
return NULL;
record_sn = context->unacked_record_sn; record_sn = context->unacked_record_sn;
} }
list_for_each_entry_from(info, &context->records_list, list) { /* We just need the _rcu for the READ_ONCE() */
rcu_read_lock();
list_for_each_entry_from_rcu(info, &context->records_list, list) {
if (before(seq, info->end_seq)) { if (before(seq, info->end_seq)) {
if (!context->retransmit_hint || if (!context->retransmit_hint ||
after(info->end_seq, after(info->end_seq,
...@@ -552,12 +601,15 @@ struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context, ...@@ -552,12 +601,15 @@ struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context,
context->retransmit_hint = info; context->retransmit_hint = info;
} }
*p_record_sn = record_sn; *p_record_sn = record_sn;
return info; goto exit_rcu_unlock;
} }
record_sn++; record_sn++;
} }
info = NULL;
return NULL; exit_rcu_unlock:
rcu_read_unlock();
return info;
} }
EXPORT_SYMBOL(tls_get_record); EXPORT_SYMBOL(tls_get_record);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment