Commit 67bf04a5 authored by Jack Steiner's avatar Jack Steiner Committed by Linus Torvalds

gru: fix prefetch and speculation bugs

Fix several bugs related to prefetch, ordering & speculation:

	- GRU cch_allocate() instruction causes cacheable memory
	  to be created. Add a barriers to prevent speculation
	  from prefetching data before it exists.
	- Add memory barriers before cache-flush instructions to ensure
	  that previously stored data is included in the line flushed to memory.
Signed-off-by: default avatarJack Steiner <steiner@sgi.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent e006043a
...@@ -325,6 +325,7 @@ static inline void gru_flush_cache(void *p) ...@@ -325,6 +325,7 @@ static inline void gru_flush_cache(void *p)
static inline void gru_start_instruction(struct gru_instruction *ins, int op32) static inline void gru_start_instruction(struct gru_instruction *ins, int op32)
{ {
gru_ordered_store_int(ins, op32); gru_ordered_store_int(ins, op32);
mb();
gru_flush_cache(ins); gru_flush_cache(ins);
} }
......
...@@ -333,6 +333,7 @@ static int gru_try_dropin(struct gru_thread_state *gts, ...@@ -333,6 +333,7 @@ static int gru_try_dropin(struct gru_thread_state *gts,
*/ */
if (tfh->status != TFHSTATUS_EXCEPTION) { if (tfh->status != TFHSTATUS_EXCEPTION) {
gru_flush_cache(tfh); gru_flush_cache(tfh);
sync_core();
if (tfh->status != TFHSTATUS_EXCEPTION) if (tfh->status != TFHSTATUS_EXCEPTION)
goto failnoexception; goto failnoexception;
STAT(tfh_stale_on_fault); STAT(tfh_stale_on_fault);
...@@ -599,6 +600,7 @@ int gru_get_exception_detail(unsigned long arg) ...@@ -599,6 +600,7 @@ int gru_get_exception_detail(unsigned long arg)
cbrnum = thread_cbr_number(gts, ucbnum); cbrnum = thread_cbr_number(gts, ucbnum);
cbe = get_cbe_by_index(gts->ts_gru, cbrnum); cbe = get_cbe_by_index(gts->ts_gru, cbrnum);
gru_flush_cache(cbe); /* CBE not coherent */ gru_flush_cache(cbe); /* CBE not coherent */
sync_core(); /* make sure we are have current data */
excdet.opc = cbe->opccpy; excdet.opc = cbe->opccpy;
excdet.exopc = cbe->exopccpy; excdet.exopc = cbe->exopccpy;
excdet.ecause = cbe->ecause; excdet.ecause = cbe->ecause;
......
...@@ -91,9 +91,18 @@ static int wait_instruction_complete(void *h, enum mcs_op opc) ...@@ -91,9 +91,18 @@ static int wait_instruction_complete(void *h, enum mcs_op opc)
int cch_allocate(struct gru_context_configuration_handle *cch) int cch_allocate(struct gru_context_configuration_handle *cch)
{ {
int ret;
cch->opc = CCHOP_ALLOCATE; cch->opc = CCHOP_ALLOCATE;
start_instruction(cch); start_instruction(cch);
return wait_instruction_complete(cch, cchop_allocate); ret = wait_instruction_complete(cch, cchop_allocate);
/*
* Stop speculation into the GSEG being mapped by the previous ALLOCATE.
* The GSEG memory does not exist until the ALLOCATE completes.
*/
sync_core();
return ret;
} }
int cch_start(struct gru_context_configuration_handle *cch) int cch_start(struct gru_context_configuration_handle *cch)
...@@ -112,9 +121,18 @@ int cch_interrupt(struct gru_context_configuration_handle *cch) ...@@ -112,9 +121,18 @@ int cch_interrupt(struct gru_context_configuration_handle *cch)
int cch_deallocate(struct gru_context_configuration_handle *cch) int cch_deallocate(struct gru_context_configuration_handle *cch)
{ {
int ret;
cch->opc = CCHOP_DEALLOCATE; cch->opc = CCHOP_DEALLOCATE;
start_instruction(cch); start_instruction(cch);
return wait_instruction_complete(cch, cchop_deallocate); ret = wait_instruction_complete(cch, cchop_deallocate);
/*
* Stop speculation into the GSEG being unmapped by the previous
* DEALLOCATE.
*/
sync_core();
return ret;
} }
int cch_interrupt_sync(struct gru_context_configuration_handle int cch_interrupt_sync(struct gru_context_configuration_handle
......
...@@ -395,6 +395,7 @@ int gru_get_cb_exception_detail(void *cb, ...@@ -395,6 +395,7 @@ int gru_get_cb_exception_detail(void *cb,
cbrnum = thread_cbr_number(bs->bs_kgts, get_cb_number(cb)); cbrnum = thread_cbr_number(bs->bs_kgts, get_cb_number(cb));
cbe = get_cbe(GRUBASE(cb), cbrnum); cbe = get_cbe(GRUBASE(cb), cbrnum);
gru_flush_cache(cbe); /* CBE not coherent */ gru_flush_cache(cbe); /* CBE not coherent */
sync_core();
excdet->opc = cbe->opccpy; excdet->opc = cbe->opccpy;
excdet->exopc = cbe->exopccpy; excdet->exopc = cbe->exopccpy;
excdet->ecause = cbe->ecause; excdet->ecause = cbe->ecause;
...@@ -461,9 +462,10 @@ int gru_check_status_proc(void *cb) ...@@ -461,9 +462,10 @@ int gru_check_status_proc(void *cb)
int ret; int ret;
ret = gen->istatus; ret = gen->istatus;
if (ret != CBS_EXCEPTION) if (ret == CBS_EXCEPTION)
return ret; ret = gru_retry_exception(cb);
return gru_retry_exception(cb); rmb();
return ret;
} }
...@@ -475,7 +477,7 @@ int gru_wait_proc(void *cb) ...@@ -475,7 +477,7 @@ int gru_wait_proc(void *cb)
ret = gru_wait_idle_or_exception(gen); ret = gru_wait_idle_or_exception(gen);
if (ret == CBS_EXCEPTION) if (ret == CBS_EXCEPTION)
ret = gru_retry_exception(cb); ret = gru_retry_exception(cb);
rmb();
return ret; return ret;
} }
......
...@@ -499,6 +499,9 @@ static void gru_load_context_data(void *save, void *grubase, int ctxnum, ...@@ -499,6 +499,9 @@ static void gru_load_context_data(void *save, void *grubase, int ctxnum,
memset(cbe + i * GRU_HANDLE_STRIDE, 0, memset(cbe + i * GRU_HANDLE_STRIDE, 0,
GRU_CACHE_LINE_BYTES); GRU_CACHE_LINE_BYTES);
} }
/* Flush CBE to hide race in context restart */
mb();
gru_flush_cache(cbe + i * GRU_HANDLE_STRIDE);
cb += GRU_HANDLE_STRIDE; cb += GRU_HANDLE_STRIDE;
} }
...@@ -519,6 +522,12 @@ static void gru_unload_context_data(void *save, void *grubase, int ctxnum, ...@@ -519,6 +522,12 @@ static void gru_unload_context_data(void *save, void *grubase, int ctxnum,
cb = gseg + GRU_CB_BASE; cb = gseg + GRU_CB_BASE;
cbe = grubase + GRU_CBE_BASE; cbe = grubase + GRU_CBE_BASE;
length = hweight64(dsrmap) * GRU_DSR_AU_BYTES; length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
/* CBEs may not be coherent. Flush them from cache */
for_each_cbr_in_allocation_map(i, &cbrmap, scr)
gru_flush_cache(cbe + i * GRU_HANDLE_STRIDE);
mb(); /* Let the CL flush complete */
gru_prefetch_context(gseg, cb, cbe, cbrmap, length); gru_prefetch_context(gseg, cb, cbe, cbrmap, length);
for_each_cbr_in_allocation_map(i, &cbrmap, scr) { for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment