Commit 55cb5f43 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'trace-v6.7-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace

Pull tracing fix from Steven Rostedt:
 "While working on the ring buffer, I found one more bug with the
  timestamp code, and the fix for this removed the need for the final
  64-bit cmpxchg!

  The ring buffer events hold a "delta" from the previous event. If it
  is determined that the delta can not be calculated, it falls back to
  adding an absolute timestamp value. The way to know if the delta can
  be used is via two stored timestamps in the per-cpu buffer meta data:

   before_stamp and write_stamp

  The before_stamp is written by every event before it tries to allocate
  its space on the ring buffer. The write_stamp is written after it
  allocates its space and knows that nothing came in after it read the
  previous before_stamp and write_stamp and the two matched.

  A previous fix dd939425 ("ring-buffer: Do not try to put back
  write_stamp") removed putting back the write_stamp to match the
  before_stamp so that the next event could use the delta, but races
  were found where the two would match, but not be for of the previous
  event.

  It was determined to allow the event reservation to not have a valid
  write_stamp when it is finished, and this fixed a lot of races.

  The last use of the 64-bit timestamp cmpxchg depended on the
  write_stamp being valid after an interruption. But this is no longer
  the case, as if an event is interrupted by a softirq that writes an
  event, and that event gets interrupted by a hardirq or NMI and that
  writes an event, then the softirq could finish its reservation without
  a valid write_stamp.

  In the slow path of the event reservation, a delta can still be used
  if the write_stamp is valid. Instead of using a cmpxchg against the
  write stamp, the before_stamp needs to be read again to validate the
  write_stamp. The cmpxchg is not needed.

  This updates the slowpath to validate the write_stamp by comparing it
  to the before_stamp and removes all rb_time_cmpxchg() as there are no
  more users of that function.

  The removal of the 32-bit updates of rb_time_t will be done in the
  next merge window"

* tag 'trace-v6.7-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace:
  ring-buffer: Fix slowpath of interrupted event
parents 9c749e61 b803d7c6
......@@ -700,48 +700,6 @@ rb_time_read_cmpxchg(local_t *l, unsigned long expect, unsigned long set)
return local_try_cmpxchg(l, &expect, set);
}
static bool rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set)
{
unsigned long cnt, top, bottom, msb;
unsigned long cnt2, top2, bottom2, msb2;
u64 val;
/* Any interruptions in this function should cause a failure */
cnt = local_read(&t->cnt);
/* The cmpxchg always fails if it interrupted an update */
if (!__rb_time_read(t, &val, &cnt2))
return false;
if (val != expect)
return false;
if ((cnt & 3) != cnt2)
return false;
cnt2 = cnt + 1;
rb_time_split(val, &top, &bottom, &msb);
msb = rb_time_val_cnt(msb, cnt);
top = rb_time_val_cnt(top, cnt);
bottom = rb_time_val_cnt(bottom, cnt);
rb_time_split(set, &top2, &bottom2, &msb2);
msb2 = rb_time_val_cnt(msb2, cnt);
top2 = rb_time_val_cnt(top2, cnt2);
bottom2 = rb_time_val_cnt(bottom2, cnt2);
if (!rb_time_read_cmpxchg(&t->cnt, cnt, cnt2))
return false;
if (!rb_time_read_cmpxchg(&t->msb, msb, msb2))
return false;
if (!rb_time_read_cmpxchg(&t->top, top, top2))
return false;
if (!rb_time_read_cmpxchg(&t->bottom, bottom, bottom2))
return false;
return true;
}
#else /* 64 bits */
/* local64_t always succeeds */
......@@ -755,11 +713,6 @@ static void rb_time_set(rb_time_t *t, u64 val)
{
local64_set(&t->time, val);
}
static bool rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set)
{
return local64_try_cmpxchg(&t->time, &expect, set);
}
#endif
/*
......@@ -3610,20 +3563,36 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
} else {
u64 ts;
/* SLOW PATH - Interrupted between A and C */
a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after);
/* Was interrupted before here, write_stamp must be valid */
/* Save the old before_stamp */
a_ok = rb_time_read(&cpu_buffer->before_stamp, &info->before);
RB_WARN_ON(cpu_buffer, !a_ok);
/*
* Read a new timestamp and update the before_stamp to make
* the next event after this one force using an absolute
* timestamp. This is in case an interrupt were to come in
* between E and F.
*/
ts = rb_time_stamp(cpu_buffer->buffer);
rb_time_set(&cpu_buffer->before_stamp, ts);
barrier();
/*E*/ a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after);
/* Was interrupted before here, write_stamp must be valid */
RB_WARN_ON(cpu_buffer, !a_ok);
barrier();
/*E*/ if (write == (local_read(&tail_page->write) & RB_WRITE_MASK) &&
info->after < ts &&
rb_time_cmpxchg(&cpu_buffer->write_stamp,
info->after, ts)) {
/* Nothing came after this event between C and E */
/*F*/ if (write == (local_read(&tail_page->write) & RB_WRITE_MASK) &&
info->after == info->before && info->after < ts) {
/*
* Nothing came after this event between C and F, it is
* safe to use info->after for the delta as it
* matched info->before and is still valid.
*/
info->delta = ts - info->after;
} else {
/*
* Interrupted between C and E:
* Interrupted between C and F:
* Lost the previous events time stamp. Just set the
* delta to zero, and this will be the same time as
* the event this event interrupted. And the events that
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment