Commit 7387943f authored by Jason A. Donenfeld's avatar Jason A. Donenfeld Committed by David S. Miller

wireguard: queueing: use saner cpu selection wrapping

Using `% nr_cpumask_bits` is slow and complicated, and not totally
robust toward dynamic changes to CPU topologies. Rather than storing the
next CPU in the round-robin, just store the last one, and also return
that value. This simplifies the loop drastically into a much more common
pattern.

Fixes: e7096c13 ("net: WireGuard secure network tunnel")
Cc: stable@vger.kernel.org
Reported-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
Tested-by: default avatarManuel Leiner <manuel.leiner@gmx.de>
Signed-off-by: default avatarJason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent a27ac539
...@@ -28,6 +28,7 @@ int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function, ...@@ -28,6 +28,7 @@ int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function,
int ret; int ret;
memset(queue, 0, sizeof(*queue)); memset(queue, 0, sizeof(*queue));
queue->last_cpu = -1;
ret = ptr_ring_init(&queue->ring, len, GFP_KERNEL); ret = ptr_ring_init(&queue->ring, len, GFP_KERNEL);
if (ret) if (ret)
return ret; return ret;
......
...@@ -117,20 +117,17 @@ static inline int wg_cpumask_choose_online(int *stored_cpu, unsigned int id) ...@@ -117,20 +117,17 @@ static inline int wg_cpumask_choose_online(int *stored_cpu, unsigned int id)
return cpu; return cpu;
} }
/* This function is racy, in the sense that next is unlocked, so it could return /* This function is racy, in the sense that it's called while last_cpu is
* the same CPU twice. A race-free version of this would be to instead store an * unlocked, so it could return the same CPU twice. Adding locking or using
* atomic sequence number, do an increment-and-return, and then iterate through * atomic sequence numbers is slower though, and the consequences of racing are
* every possible CPU until we get to that index -- choose_cpu. However that's * harmless, so live with it.
* a bit slower, and it doesn't seem like this potential race actually
* introduces any performance loss, so we live with it.
*/ */
static inline int wg_cpumask_next_online(int *next) static inline int wg_cpumask_next_online(int *last_cpu)
{ {
int cpu = *next; int cpu = cpumask_next(*last_cpu, cpu_online_mask);
if (cpu >= nr_cpu_ids)
while (unlikely(!cpumask_test_cpu(cpu, cpu_online_mask))) cpu = cpumask_first(cpu_online_mask);
cpu = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits; *last_cpu = cpu;
*next = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits;
return cpu; return cpu;
} }
...@@ -159,7 +156,7 @@ static inline void wg_prev_queue_drop_peeked(struct prev_queue *queue) ...@@ -159,7 +156,7 @@ static inline void wg_prev_queue_drop_peeked(struct prev_queue *queue)
static inline int wg_queue_enqueue_per_device_and_peer( static inline int wg_queue_enqueue_per_device_and_peer(
struct crypt_queue *device_queue, struct prev_queue *peer_queue, struct crypt_queue *device_queue, struct prev_queue *peer_queue,
struct sk_buff *skb, struct workqueue_struct *wq, int *next_cpu) struct sk_buff *skb, struct workqueue_struct *wq)
{ {
int cpu; int cpu;
...@@ -173,7 +170,7 @@ static inline int wg_queue_enqueue_per_device_and_peer( ...@@ -173,7 +170,7 @@ static inline int wg_queue_enqueue_per_device_and_peer(
/* Then we queue it up in the device queue, which consumes the /* Then we queue it up in the device queue, which consumes the
* packet as soon as it can. * packet as soon as it can.
*/ */
cpu = wg_cpumask_next_online(next_cpu); cpu = wg_cpumask_next_online(&device_queue->last_cpu);
if (unlikely(ptr_ring_produce_bh(&device_queue->ring, skb))) if (unlikely(ptr_ring_produce_bh(&device_queue->ring, skb)))
return -EPIPE; return -EPIPE;
queue_work_on(cpu, wq, &per_cpu_ptr(device_queue->worker, cpu)->work); queue_work_on(cpu, wq, &per_cpu_ptr(device_queue->worker, cpu)->work);
......
...@@ -524,7 +524,7 @@ static void wg_packet_consume_data(struct wg_device *wg, struct sk_buff *skb) ...@@ -524,7 +524,7 @@ static void wg_packet_consume_data(struct wg_device *wg, struct sk_buff *skb)
goto err; goto err;
ret = wg_queue_enqueue_per_device_and_peer(&wg->decrypt_queue, &peer->rx_queue, skb, ret = wg_queue_enqueue_per_device_and_peer(&wg->decrypt_queue, &peer->rx_queue, skb,
wg->packet_crypt_wq, &wg->decrypt_queue.last_cpu); wg->packet_crypt_wq);
if (unlikely(ret == -EPIPE)) if (unlikely(ret == -EPIPE))
wg_queue_enqueue_per_peer_rx(skb, PACKET_STATE_DEAD); wg_queue_enqueue_per_peer_rx(skb, PACKET_STATE_DEAD);
if (likely(!ret || ret == -EPIPE)) { if (likely(!ret || ret == -EPIPE)) {
......
...@@ -318,7 +318,7 @@ static void wg_packet_create_data(struct wg_peer *peer, struct sk_buff *first) ...@@ -318,7 +318,7 @@ static void wg_packet_create_data(struct wg_peer *peer, struct sk_buff *first)
goto err; goto err;
ret = wg_queue_enqueue_per_device_and_peer(&wg->encrypt_queue, &peer->tx_queue, first, ret = wg_queue_enqueue_per_device_and_peer(&wg->encrypt_queue, &peer->tx_queue, first,
wg->packet_crypt_wq, &wg->encrypt_queue.last_cpu); wg->packet_crypt_wq);
if (unlikely(ret == -EPIPE)) if (unlikely(ret == -EPIPE))
wg_queue_enqueue_per_peer_tx(first, PACKET_STATE_DEAD); wg_queue_enqueue_per_peer_tx(first, PACKET_STATE_DEAD);
err: err:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment