Commit 19fc9ddd authored by David Carrillo-Cisneros's avatar David Carrillo-Cisneros Committed by Ingo Molnar

perf/x86/intel: Fix MSR_LAST_BRANCH_FROM_x bug when no TSX

Intel's SDM states that bits 61:62 in MSR_LAST_BRANCH_FROM_x are the
TSX flags for formats with LBR_TSX flags (i.e. LBR_FORMAT_EIP_EFLAGS2).

However, when the CPU has TSX support deactivated, bits 61:62 actually
behave as follows:

  - For wrmsr(), bits 61:62 are considered part of the sign extension.
  - When capturing branches, the LBR hw will always clear bits 61:62.
    regardless of the sign extension.

Therefore, if:

  1) LBR has TSX format.
  2) CPU has no TSX support enabled.

... then any value passed to wrmsr() must be sign extended to 63 bits
and any value from rdmsr() must be converted to have a sign extension
of 61 bits, ignoring the values at TSX flags.

This bug was masked by the work-around to the Intel's CPU bug:
BJ94. "LBR May Contain Incorrect Information When Using FREEZE_LBRS_ON_PMI"
in Document Number: 324643-037US.

The aforementioned work-around uses hw flags to filter out all kernel
branches, limiting LBR callstack to user level execution only.

Since user addresses are not sign extended, they do not trigger the wrmsr()
bug in MSR_LAST_BRANCH_FROM_x when saved/restored at context switch.

To verify the hw bug:

  $ perf record -b -e cycles sleep 1
  $ rdmsr -p 0 0x680
  0x1fffffffb0b9b0cc
  $ wrmsr -p 0 0x680 0x1fffffffb0b9b0cc
  write(): Input/output error

The quirk for LBR_FROM_ MSRs is required before calls to wrmsrl() and
after rdmsrl().

This patch introduces it for wrmsrl()'s done for testing LBR support.

Future patch in series adds the quirk for context switch, that would
be required if LBR callstack is to be enabled for ring 0.
Signed-off-by: default avatarDavid Carrillo-Cisneros <davidcc@google.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: default avatarStephane Eranian <eranian@google.com>
Reviewed-by: default avatarAndi Kleen <ak@linux.intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/1466533874-52003-3-git-send-email-davidcc@google.comSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent f09509b9
...@@ -3361,6 +3361,13 @@ static void intel_snb_check_microcode(void) ...@@ -3361,6 +3361,13 @@ static void intel_snb_check_microcode(void)
} }
} }
static bool is_lbr_from(unsigned long msr)
{
unsigned long lbr_from_nr = x86_pmu.lbr_from + x86_pmu.lbr_nr;
return x86_pmu.lbr_from <= msr && msr < lbr_from_nr;
}
/* /*
* Under certain circumstances, access certain MSR may cause #GP. * Under certain circumstances, access certain MSR may cause #GP.
* The function tests if the input MSR can be safely accessed. * The function tests if the input MSR can be safely accessed.
...@@ -3381,13 +3388,24 @@ static bool check_msr(unsigned long msr, u64 mask) ...@@ -3381,13 +3388,24 @@ static bool check_msr(unsigned long msr, u64 mask)
* Only change the bits which can be updated by wrmsrl. * Only change the bits which can be updated by wrmsrl.
*/ */
val_tmp = val_old ^ mask; val_tmp = val_old ^ mask;
if (is_lbr_from(msr))
val_tmp = lbr_from_signext_quirk_wr(val_tmp);
if (wrmsrl_safe(msr, val_tmp) || if (wrmsrl_safe(msr, val_tmp) ||
rdmsrl_safe(msr, &val_new)) rdmsrl_safe(msr, &val_new))
return false; return false;
/*
* Quirk only affects validation in wrmsr(), so wrmsrl()'s value
* should equal rdmsrl()'s even with the quirk.
*/
if (val_new != val_tmp) if (val_new != val_tmp)
return false; return false;
if (is_lbr_from(msr))
val_old = lbr_from_signext_quirk_wr(val_old);
/* Here it's sure that the MSR can be safely accessed. /* Here it's sure that the MSR can be safely accessed.
* Restore the old value and return. * Restore the old value and return.
*/ */
......
...@@ -81,6 +81,8 @@ static enum { ...@@ -81,6 +81,8 @@ static enum {
#define LBR_FROM_FLAG_IN_TX (1ULL << 62) #define LBR_FROM_FLAG_IN_TX (1ULL << 62)
#define LBR_FROM_FLAG_ABORT (1ULL << 61) #define LBR_FROM_FLAG_ABORT (1ULL << 61)
#define LBR_FROM_SIGNEXT_2MSB (BIT_ULL(60) | BIT_ULL(59))
/* /*
* x86control flow change classification * x86control flow change classification
* x86control flow changes include branches, interrupts, traps, faults * x86control flow changes include branches, interrupts, traps, faults
...@@ -235,6 +237,53 @@ enum { ...@@ -235,6 +237,53 @@ enum {
LBR_VALID, LBR_VALID,
}; };
/*
* For formats with LBR_TSX flags (e.g. LBR_FORMAT_EIP_FLAGS2), bits 61:62 in
* MSR_LAST_BRANCH_FROM_x are the TSX flags when TSX is supported, but when
* TSX is not supported they have no consistent behavior:
*
* - For wrmsr(), bits 61:62 are considered part of the sign extension.
* - For HW updates (branch captures) bits 61:62 are always OFF and are not
* part of the sign extension.
*
* Therefore, if:
*
* 1) LBR has TSX format
* 2) CPU has no TSX support enabled
*
* ... then any value passed to wrmsr() must be sign extended to 63 bits and any
* value from rdmsr() must be converted to have a 61 bits sign extension,
* ignoring the TSX flags.
*/
static inline bool lbr_from_signext_quirk_needed(void)
{
int lbr_format = x86_pmu.intel_cap.lbr_format;
bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) ||
boot_cpu_has(X86_FEATURE_RTM);
return !tsx_support && (lbr_desc[lbr_format] & LBR_TSX);
}
DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
/* If quirk is enabled, ensure sign extension is 63 bits: */
inline u64 lbr_from_signext_quirk_wr(u64 val)
{
if (static_branch_unlikely(&lbr_from_quirk_key)) {
/*
* Sign extend into bits 61:62 while preserving bit 63.
*
* Quirk is enabled when TSX is disabled. Therefore TSX bits
* in val are always OFF and must be changed to be sign
* extension bits. Since bits 59:60 are guaranteed to be
* part of the sign extension bits, we can just copy them
* to 61:62.
*/
val |= (LBR_FROM_SIGNEXT_2MSB & val) << 2;
}
return val;
}
static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx) static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
{ {
int i; int i;
...@@ -1007,6 +1056,9 @@ void intel_pmu_lbr_init_hsw(void) ...@@ -1007,6 +1056,9 @@ void intel_pmu_lbr_init_hsw(void)
x86_pmu.lbr_sel_mask = LBR_SEL_MASK; x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
x86_pmu.lbr_sel_map = hsw_lbr_sel_map; x86_pmu.lbr_sel_map = hsw_lbr_sel_map;
if (lbr_from_signext_quirk_needed())
static_branch_enable(&lbr_from_quirk_key);
} }
/* skylake */ /* skylake */
......
...@@ -902,6 +902,8 @@ void intel_ds_init(void); ...@@ -902,6 +902,8 @@ void intel_ds_init(void);
void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in); void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
u64 lbr_from_signext_quirk_wr(u64 val);
void intel_pmu_lbr_reset(void); void intel_pmu_lbr_reset(void);
void intel_pmu_lbr_enable(struct perf_event *event); void intel_pmu_lbr_enable(struct perf_event *event);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment