Commit cd1334f0 authored by Jack Steiner's avatar Jack Steiner Committed by Linus Torvalds

gru: bug fixes for GRU exception handling

Bug fixes for GRU exception handling.  Additional fields from the CBR must
be returned to the user to allow the user to correctly diagnose GRU
exceptions.

Handle endcase in TFH TLB miss handling.  Verify that TFH actually
indicates a pending exception.
Signed-off-by: default avatarJack Steiner <steiner@sgi.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent d6580a9f
...@@ -81,6 +81,8 @@ struct control_block_extended_exc_detail { ...@@ -81,6 +81,8 @@ struct control_block_extended_exc_detail {
int exopc; int exopc;
long exceptdet0; long exceptdet0;
int exceptdet1; int exceptdet1;
int cbrstate;
int cbrexecstatus;
}; };
/* /*
...@@ -107,7 +109,8 @@ struct gru_instruction_bits { ...@@ -107,7 +109,8 @@ struct gru_instruction_bits {
unsigned char reserved2: 2; unsigned char reserved2: 2;
unsigned char istatus: 2; unsigned char istatus: 2;
unsigned char isubstatus:4; unsigned char isubstatus:4;
unsigned char reserved3: 2; unsigned char reserved3: 1;
unsigned char tlb_fault_color: 1;
/* DW 1 */ /* DW 1 */
unsigned long idef4; /* 42 bits: TRi1, BufSize */ unsigned long idef4; /* 42 bits: TRi1, BufSize */
/* DW 2-6 */ /* DW 2-6 */
...@@ -253,6 +256,21 @@ struct gru_instruction { ...@@ -253,6 +256,21 @@ struct gru_instruction {
#define CBE_CAUSE_RESPONSE_DATA_ERROR (1 << 16) #define CBE_CAUSE_RESPONSE_DATA_ERROR (1 << 16)
#define CBE_CAUSE_PROTOCOL_STATE_DATA_ERROR (1 << 17) #define CBE_CAUSE_PROTOCOL_STATE_DATA_ERROR (1 << 17)
/* CBE cbrexecstatus bits */
#define CBR_EXS_ABORT_OCC_BIT 0
#define CBR_EXS_INT_OCC_BIT 1
#define CBR_EXS_PENDING_BIT 2
#define CBR_EXS_QUEUED_BIT 3
#define CBR_EXS_TLBHW_BIT 4
#define CBR_EXS_EXCEPTION_BIT 5
#define CBR_EXS_ABORT_OCC (1 << CBR_EXS_ABORT_OCC_BIT)
#define CBR_EXS_INT_OCC (1 << CBR_EXS_INT_OCC_BIT)
#define CBR_EXS_PENDING (1 << CBR_EXS_PENDING_BIT)
#define CBR_EXS_QUEUED (1 << CBR_EXS_QUEUED_BIT)
#define CBR_EXS_TLBHW (1 << CBR_EXS_TLBHW_BIT)
#define CBR_EXS_EXCEPTION (1 << CBR_EXS_EXCEPTION_BIT)
/* /*
* Exceptions are retried for the following cases. If any OTHER bits are set * Exceptions are retried for the following cases. If any OTHER bits are set
* in ecause, the exception is not retryable. * in ecause, the exception is not retryable.
......
...@@ -334,6 +334,8 @@ static int gru_try_dropin(struct gru_thread_state *gts, ...@@ -334,6 +334,8 @@ static int gru_try_dropin(struct gru_thread_state *gts,
* Might be a hardware race OR a stupid user. Ignore FMM because FMM * Might be a hardware race OR a stupid user. Ignore FMM because FMM
* is a transient state. * is a transient state.
*/ */
if (tfh->status != TFHSTATUS_EXCEPTION)
goto failnoexception;
if (tfh->state == TFHSTATE_IDLE) if (tfh->state == TFHSTATE_IDLE)
goto failidle; goto failidle;
if (tfh->state == TFHSTATE_MISS_FMM && cb) if (tfh->state == TFHSTATE_MISS_FMM && cb)
...@@ -401,8 +403,17 @@ static int gru_try_dropin(struct gru_thread_state *gts, ...@@ -401,8 +403,17 @@ static int gru_try_dropin(struct gru_thread_state *gts,
gru_dbg(grudev, "FAILED fmm tfh: 0x%p, state %d\n", tfh, tfh->state); gru_dbg(grudev, "FAILED fmm tfh: 0x%p, state %d\n", tfh, tfh->state);
return 0; return 0;
failnoexception:
/* TFH status did not show exception pending */
gru_flush_cache(tfh);
if (cb)
gru_flush_cache(cb);
STAT(tlb_dropin_fail_no_exception);
gru_dbg(grudev, "FAILED non-exception tfh: 0x%p, status %d, state %d\n", tfh, tfh->status, tfh->state);
return 0;
failidle: failidle:
/* TFH was idle - no miss pending */ /* TFH state was idle - no miss pending */
gru_flush_cache(tfh); gru_flush_cache(tfh);
if (cb) if (cb)
gru_flush_cache(cb); gru_flush_cache(cb);
...@@ -472,7 +483,8 @@ irqreturn_t gru_intr(int irq, void *dev_id) ...@@ -472,7 +483,8 @@ irqreturn_t gru_intr(int irq, void *dev_id)
* This is running in interrupt context. Trylock the mmap_sem. * This is running in interrupt context. Trylock the mmap_sem.
* If it fails, retry the fault in user context. * If it fails, retry the fault in user context.
*/ */
if (down_read_trylock(&gts->ts_mm->mmap_sem)) { if (!gts->ts_force_cch_reload &&
down_read_trylock(&gts->ts_mm->mmap_sem)) {
gru_try_dropin(gts, tfh, NULL); gru_try_dropin(gts, tfh, NULL);
up_read(&gts->ts_mm->mmap_sem); up_read(&gts->ts_mm->mmap_sem);
} else { } else {
...@@ -595,14 +607,19 @@ int gru_get_exception_detail(unsigned long arg) ...@@ -595,14 +607,19 @@ int gru_get_exception_detail(unsigned long arg)
excdet.ecause = cbe->ecause; excdet.ecause = cbe->ecause;
excdet.exceptdet0 = cbe->idef1upd; excdet.exceptdet0 = cbe->idef1upd;
excdet.exceptdet1 = cbe->idef3upd; excdet.exceptdet1 = cbe->idef3upd;
excdet.cbrstate = cbe->cbrstate;
excdet.cbrexecstatus = cbe->cbrexecstatus;
ret = 0; ret = 0;
} else { } else {
ret = -EAGAIN; ret = -EAGAIN;
} }
gru_unlock_gts(gts); gru_unlock_gts(gts);
gru_dbg(grudev, "address 0x%lx, ecause 0x%x\n", excdet.cb, gru_dbg(grudev,
excdet.ecause); "cb 0x%lx, op %d, exopc %d, cbrstate %d, cbrexecstatus 0x%x, ecause 0x%x, "
"exdet0 0x%lx, exdet1 0x%x\n",
excdet.cb, excdet.opc, excdet.exopc, excdet.cbrstate, excdet.cbrexecstatus,
excdet.ecause, excdet.exceptdet0, excdet.exceptdet1);
if (!ret && copy_to_user((void __user *)arg, &excdet, sizeof(excdet))) if (!ret && copy_to_user((void __user *)arg, &excdet, sizeof(excdet)))
ret = -EFAULT; ret = -EFAULT;
return ret; return ret;
......
...@@ -251,15 +251,14 @@ struct gru_tlb_fault_handle { ...@@ -251,15 +251,14 @@ struct gru_tlb_fault_handle {
unsigned int fill1:9; unsigned int fill1:9;
unsigned int status:2; unsigned int status:2;
unsigned int fill2:1; unsigned int fill2:2;
unsigned int color:1;
unsigned int state:3; unsigned int state:3;
unsigned int fill3:1; unsigned int fill3:1;
unsigned int cause:7; /* DW 0 - high 32 */ unsigned int cause:7;
unsigned int fill4:1; unsigned int fill4:1;
unsigned int indexway:12; unsigned int indexway:12; /* DW 0 - high 32 */
unsigned int fill5:4; unsigned int fill5:4;
unsigned int ctxnum:4; unsigned int ctxnum:4;
...@@ -457,21 +456,7 @@ enum gru_cbr_state { ...@@ -457,21 +456,7 @@ enum gru_cbr_state {
CBRSTATE_BUSY_INTERRUPT, CBRSTATE_BUSY_INTERRUPT,
}; };
/* CBE cbrexecstatus bits */ /* CBE cbrexecstatus bits - defined in gru_instructions.h*/
#define CBR_EXS_ABORT_OCC_BIT 0
#define CBR_EXS_INT_OCC_BIT 1
#define CBR_EXS_PENDING_BIT 2
#define CBR_EXS_QUEUED_BIT 3
#define CBR_EXS_TLBHW_BIT 4
#define CBR_EXS_EXCEPTION_BIT 5
#define CBR_EXS_ABORT_OCC (1 << CBR_EXS_ABORT_OCC_BIT)
#define CBR_EXS_INT_OCC (1 << CBR_EXS_INT_OCC_BIT)
#define CBR_EXS_PENDING (1 << CBR_EXS_PENDING_BIT)
#define CBR_EXS_QUEUED (1 << CBR_EXS_QUEUED_BIT)
#define CBR_EXS_TLBHW (1 << CBR_EXS_TLBHW_BIT)
#define CBR_EXS_EXCEPTION (1 << CBR_EXS_EXCEPTION_BIT)
/* CBE ecause bits - defined in gru_instructions.h */ /* CBE ecause bits - defined in gru_instructions.h */
/* /*
......
...@@ -599,6 +599,9 @@ int gru_update_cch(struct gru_thread_state *gts, int force_unload) ...@@ -599,6 +599,9 @@ int gru_update_cch(struct gru_thread_state *gts, int force_unload)
cch->sizeavail[i] = gts->ts_sizeavail; cch->sizeavail[i] = gts->ts_sizeavail;
gts->ts_tlb_int_select = gru_cpu_fault_map_id(); gts->ts_tlb_int_select = gru_cpu_fault_map_id();
cch->tlb_int_select = gru_cpu_fault_map_id(); cch->tlb_int_select = gru_cpu_fault_map_id();
cch->tfm_fault_bit_enable =
(gts->ts_user_options == GRU_OPT_MISS_FMM_POLL
|| gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
} else { } else {
for (i = 0; i < 8; i++) for (i = 0; i < 8; i++)
cch->asid[i] = 0; cch->asid[i] = 0;
......
...@@ -84,6 +84,8 @@ static int statistics_show(struct seq_file *s, void *p) ...@@ -84,6 +84,8 @@ static int statistics_show(struct seq_file *s, void *p)
printstat(s, tlb_dropin_fail_range_active); printstat(s, tlb_dropin_fail_range_active);
printstat(s, tlb_dropin_fail_idle); printstat(s, tlb_dropin_fail_idle);
printstat(s, tlb_dropin_fail_fmm); printstat(s, tlb_dropin_fail_fmm);
printstat(s, tlb_dropin_fail_no_exception);
printstat(s, tlb_dropin_fail_no_exception_war);
printstat(s, mmu_invalidate_range); printstat(s, mmu_invalidate_range);
printstat(s, mmu_invalidate_page); printstat(s, mmu_invalidate_page);
printstat(s, mmu_clear_flush_young); printstat(s, mmu_clear_flush_young);
...@@ -158,8 +160,7 @@ static ssize_t options_write(struct file *file, const char __user *userbuf, ...@@ -158,8 +160,7 @@ static ssize_t options_write(struct file *file, const char __user *userbuf,
unsigned long val; unsigned long val;
char buf[80]; char buf[80];
if (copy_from_user if (strncpy_from_user(buf, userbuf, sizeof(buf) - 1) < 0)
(buf, userbuf, count < sizeof(buf) ? count : sizeof(buf)))
return -EFAULT; return -EFAULT;
buf[count - 1] = '\0'; buf[count - 1] = '\0';
if (!strict_strtoul(buf, 10, &val)) if (!strict_strtoul(buf, 10, &val))
......
...@@ -207,6 +207,8 @@ struct gru_stats_s { ...@@ -207,6 +207,8 @@ struct gru_stats_s {
atomic_long_t tlb_dropin_fail_range_active; atomic_long_t tlb_dropin_fail_range_active;
atomic_long_t tlb_dropin_fail_idle; atomic_long_t tlb_dropin_fail_idle;
atomic_long_t tlb_dropin_fail_fmm; atomic_long_t tlb_dropin_fail_fmm;
atomic_long_t tlb_dropin_fail_no_exception;
atomic_long_t tlb_dropin_fail_no_exception_war;
atomic_long_t mmu_invalidate_range; atomic_long_t mmu_invalidate_range;
atomic_long_t mmu_invalidate_page; atomic_long_t mmu_invalidate_page;
atomic_long_t mmu_clear_flush_young; atomic_long_t mmu_clear_flush_young;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment