Commit bfbe5bab authored by David S. Miller's avatar David S. Miller

Merge branch 'ptr_ring-fixes'

Michael S. Tsirkin says:

====================
ptr_ring fixes

This fixes a bunch of issues around ptr_ring use in net core.
One of these: "tap: fix use-after-free" is also needed on net,
but can't be backported cleanly.

I will post a net patch separately.

Lightly tested - Jason, could you pls confirm this
addresses the security issue you saw with ptr_ring?
Testing reports would be appreciated too.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
Tested-by: default avatarJason Wang <jasowang@redhat.com>
Acked-by: default avatarJason Wang <jasowang@redhat.com>
parents 7ece54a6 491847f3
...@@ -330,9 +330,6 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) ...@@ -330,9 +330,6 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb)
if (!q) if (!q)
return RX_HANDLER_PASS; return RX_HANDLER_PASS;
if (__ptr_ring_full(&q->ring))
goto drop;
skb_push(skb, ETH_HLEN); skb_push(skb, ETH_HLEN);
/* Apply the forward feature mask so that we perform segmentation /* Apply the forward feature mask so that we perform segmentation
......
...@@ -45,9 +45,10 @@ struct ptr_ring { ...@@ -45,9 +45,10 @@ struct ptr_ring {
}; };
/* Note: callers invoking this in a loop must use a compiler barrier, /* Note: callers invoking this in a loop must use a compiler barrier,
* for example cpu_relax(). If ring is ever resized, callers must hold * for example cpu_relax().
* producer_lock - see e.g. ptr_ring_full. Otherwise, if callers don't hold *
* producer_lock, the next call to __ptr_ring_produce may fail. * NB: this is unlike __ptr_ring_empty in that callers must hold producer_lock:
* see e.g. ptr_ring_full.
*/ */
static inline bool __ptr_ring_full(struct ptr_ring *r) static inline bool __ptr_ring_full(struct ptr_ring *r)
{ {
...@@ -113,7 +114,7 @@ static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr) ...@@ -113,7 +114,7 @@ static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr)
/* Pairs with smp_read_barrier_depends in __ptr_ring_consume. */ /* Pairs with smp_read_barrier_depends in __ptr_ring_consume. */
smp_wmb(); smp_wmb();
r->queue[r->producer++] = ptr; WRITE_ONCE(r->queue[r->producer++], ptr);
if (unlikely(r->producer >= r->size)) if (unlikely(r->producer >= r->size))
r->producer = 0; r->producer = 0;
return 0; return 0;
...@@ -169,32 +170,36 @@ static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr) ...@@ -169,32 +170,36 @@ static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr)
return ret; return ret;
} }
/* Note: callers invoking this in a loop must use a compiler barrier,
* for example cpu_relax(). Callers must take consumer_lock
* if they dereference the pointer - see e.g. PTR_RING_PEEK_CALL.
* If ring is never resized, and if the pointer is merely
* tested, there's no need to take the lock - see e.g. __ptr_ring_empty.
* However, if called outside the lock, and if some other CPU
* consumes ring entries at the same time, the value returned
* is not guaranteed to be correct.
* In this case - to avoid incorrectly detecting the ring
* as empty - the CPU consuming the ring entries is responsible
* for either consuming all ring entries until the ring is empty,
* or synchronizing with some other CPU and causing it to
* execute __ptr_ring_peek and/or consume the ring enteries
* after the synchronization point.
*/
static inline void *__ptr_ring_peek(struct ptr_ring *r) static inline void *__ptr_ring_peek(struct ptr_ring *r)
{ {
if (likely(r->size)) if (likely(r->size))
return r->queue[r->consumer_head]; return READ_ONCE(r->queue[r->consumer_head]);
return NULL; return NULL;
} }
/* See __ptr_ring_peek above for locking rules. */ /*
* Test ring empty status without taking any locks.
*
* NB: This is only safe to call if ring is never resized.
*
* However, if some other CPU consumes ring entries at the same time, the value
* returned is not guaranteed to be correct.
*
* In this case - to avoid incorrectly detecting the ring
* as empty - the CPU consuming the ring entries is responsible
* for either consuming all ring entries until the ring is empty,
* or synchronizing with some other CPU and causing it to
* re-test __ptr_ring_empty and/or consume the ring enteries
* after the synchronization point.
*
* Note: callers invoking this in a loop must use a compiler barrier,
* for example cpu_relax().
*/
static inline bool __ptr_ring_empty(struct ptr_ring *r) static inline bool __ptr_ring_empty(struct ptr_ring *r)
{ {
return !__ptr_ring_peek(r); if (likely(r->size))
return !r->queue[READ_ONCE(r->consumer_head)];
return true;
} }
static inline bool ptr_ring_empty(struct ptr_ring *r) static inline bool ptr_ring_empty(struct ptr_ring *r)
...@@ -248,22 +253,28 @@ static inline void __ptr_ring_discard_one(struct ptr_ring *r) ...@@ -248,22 +253,28 @@ static inline void __ptr_ring_discard_one(struct ptr_ring *r)
/* Fundamentally, what we want to do is update consumer /* Fundamentally, what we want to do is update consumer
* index and zero out the entry so producer can reuse it. * index and zero out the entry so producer can reuse it.
* Doing it naively at each consume would be as simple as: * Doing it naively at each consume would be as simple as:
* r->queue[r->consumer++] = NULL; * consumer = r->consumer;
* if (unlikely(r->consumer >= r->size)) * r->queue[consumer++] = NULL;
* r->consumer = 0; * if (unlikely(consumer >= r->size))
* consumer = 0;
* r->consumer = consumer;
* but that is suboptimal when the ring is full as producer is writing * but that is suboptimal when the ring is full as producer is writing
* out new entries in the same cache line. Defer these updates until a * out new entries in the same cache line. Defer these updates until a
* batch of entries has been consumed. * batch of entries has been consumed.
*/ */
int head = r->consumer_head++; /* Note: we must keep consumer_head valid at all times for __ptr_ring_empty
* to work correctly.
*/
int consumer_head = r->consumer_head;
int head = consumer_head++;
/* Once we have processed enough entries invalidate them in /* Once we have processed enough entries invalidate them in
* the ring all at once so producer can reuse their space in the ring. * the ring all at once so producer can reuse their space in the ring.
* We also do this when we reach end of the ring - not mandatory * We also do this when we reach end of the ring - not mandatory
* but helps keep the implementation simple. * but helps keep the implementation simple.
*/ */
if (unlikely(r->consumer_head - r->consumer_tail >= r->batch || if (unlikely(consumer_head - r->consumer_tail >= r->batch ||
r->consumer_head >= r->size)) { consumer_head >= r->size)) {
/* Zero out entries in the reverse order: this way we touch the /* Zero out entries in the reverse order: this way we touch the
* cache line that producer might currently be reading the last; * cache line that producer might currently be reading the last;
* producer won't make progress and touch other cache lines * producer won't make progress and touch other cache lines
...@@ -271,12 +282,14 @@ static inline void __ptr_ring_discard_one(struct ptr_ring *r) ...@@ -271,12 +282,14 @@ static inline void __ptr_ring_discard_one(struct ptr_ring *r)
*/ */
while (likely(head >= r->consumer_tail)) while (likely(head >= r->consumer_tail))
r->queue[head--] = NULL; r->queue[head--] = NULL;
r->consumer_tail = r->consumer_head; r->consumer_tail = consumer_head;
} }
if (unlikely(r->consumer_head >= r->size)) { if (unlikely(consumer_head >= r->size)) {
r->consumer_head = 0; consumer_head = 0;
r->consumer_tail = 0; r->consumer_tail = 0;
} }
/* matching READ_ONCE in __ptr_ring_empty for lockless tests */
WRITE_ONCE(r->consumer_head, consumer_head);
} }
static inline void *__ptr_ring_consume(struct ptr_ring *r) static inline void *__ptr_ring_consume(struct ptr_ring *r)
...@@ -453,12 +466,7 @@ static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r, ...@@ -453,12 +466,7 @@ static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r,
static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp) static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp)
{ {
/* Allocate an extra dummy element at end of ring to avoid consumer head return kcalloc(size, sizeof(void *), gfp);
* or produce head access past the end of the array. Possible when
* producer/consumer operations and __ptr_ring_peek operations run in
* parallel.
*/
return kcalloc(size + 1, sizeof(void *), gfp);
} }
static inline void __ptr_ring_set_size(struct ptr_ring *r, int size) static inline void __ptr_ring_set_size(struct ptr_ring *r, int size)
...@@ -532,7 +540,9 @@ static inline void ptr_ring_unconsume(struct ptr_ring *r, void **batch, int n, ...@@ -532,7 +540,9 @@ static inline void ptr_ring_unconsume(struct ptr_ring *r, void **batch, int n,
goto done; goto done;
} }
r->queue[head] = batch[--n]; r->queue[head] = batch[--n];
r->consumer_tail = r->consumer_head = head; r->consumer_tail = head;
/* matching READ_ONCE in __ptr_ring_empty for lockless tests */
WRITE_ONCE(r->consumer_head, head);
} }
done: done:
......
...@@ -69,7 +69,7 @@ static inline int skb_array_produce_any(struct skb_array *a, struct sk_buff *skb ...@@ -69,7 +69,7 @@ static inline int skb_array_produce_any(struct skb_array *a, struct sk_buff *skb
*/ */
static inline bool __skb_array_empty(struct skb_array *a) static inline bool __skb_array_empty(struct skb_array *a)
{ {
return !__ptr_ring_peek(&a->ring); return __ptr_ring_empty(&a->ring);
} }
static inline struct sk_buff *__skb_array_peek(struct skb_array *a) static inline struct sk_buff *__skb_array_peek(struct skb_array *a)
......
...@@ -118,7 +118,7 @@ static inline void free_page(unsigned long addr) ...@@ -118,7 +118,7 @@ static inline void free_page(unsigned long addr)
#define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) #define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
#define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) #define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
#define WARN_ON_ONCE(cond) ((cond) && fprintf (stderr, "WARNING\n")) #define WARN_ON_ONCE(cond) ((cond) ? fprintf (stderr, "WARNING\n") : 0)
#define min(x, y) ({ \ #define min(x, y) ({ \
typeof(x) _min1 = (x); \ typeof(x) _min1 = (x); \
......
#define check_copy_size(A, B, C) (1)
...@@ -111,7 +111,7 @@ static inline void busy_wait(void) ...@@ -111,7 +111,7 @@ static inline void busy_wait(void)
} }
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
#define smp_mb() asm volatile("lock; addl $0,-128(%%rsp)" ::: "memory", "cc") #define smp_mb() asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc")
#else #else
/* /*
* Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized * Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized
...@@ -134,4 +134,61 @@ static inline void busy_wait(void) ...@@ -134,4 +134,61 @@ static inline void busy_wait(void)
barrier(); \ barrier(); \
} while (0) } while (0)
#if defined(__i386__) || defined(__x86_64__) || defined(__s390x__)
#define smp_wmb() barrier()
#else
#define smp_wmb() smp_release()
#endif
#ifdef __alpha__
#define smp_read_barrier_depends() smp_acquire()
#else
#define smp_read_barrier_depends() do {} while(0)
#endif
static __always_inline
void __read_once_size(const volatile void *p, void *res, int size)
{
switch (size) { \
case 1: *(unsigned char *)res = *(volatile unsigned char *)p; break; \
case 2: *(unsigned short *)res = *(volatile unsigned short *)p; break; \
case 4: *(unsigned int *)res = *(volatile unsigned int *)p; break; \
case 8: *(unsigned long long *)res = *(volatile unsigned long long *)p; break; \
default: \
barrier(); \
__builtin_memcpy((void *)res, (const void *)p, size); \
barrier(); \
} \
}
static __always_inline void __write_once_size(volatile void *p, void *res, int size)
{
switch (size) {
case 1: *(volatile unsigned char *)p = *(unsigned char *)res; break;
case 2: *(volatile unsigned short *)p = *(unsigned short *)res; break;
case 4: *(volatile unsigned int *)p = *(unsigned int *)res; break;
case 8: *(volatile unsigned long long *)p = *(unsigned long long *)res; break;
default:
barrier();
__builtin_memcpy((void *)p, (const void *)res, size);
barrier();
}
}
#define READ_ONCE(x) \
({ \
union { typeof(x) __val; char __c[1]; } __u; \
__read_once_size(&(x), __u.__c, sizeof(x)); \
smp_read_barrier_depends(); /* Enforce dependency ordering from x */ \
__u.__val; \
})
#define WRITE_ONCE(x, val) \
({ \
union { typeof(x) __val; char __c[1]; } __u = \
{ .__val = (typeof(x)) (val) }; \
__write_once_size(&(x), __u.__c, sizeof(x)); \
__u.__val; \
})
#endif #endif
...@@ -187,7 +187,7 @@ bool enable_kick() ...@@ -187,7 +187,7 @@ bool enable_kick()
bool avail_empty() bool avail_empty()
{ {
return !__ptr_ring_peek(&array); return __ptr_ring_empty(&array);
} }
bool use_buf(unsigned *lenp, void **bufp) bool use_buf(unsigned *lenp, void **bufp)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment