Commit 29b0a825 authored by Heiko Carstens's avatar Heiko Carstens Committed by Martin Schwidefsky

s390/etr,stp: fix possible deadlock on machine check

The first level machine check handler for etr and stp machine checks may
call queue_work() while in nmi context. This may deadlock e.g. if the
machine check happened when the interrupted context did hold a lock, that
also will be acquired by queue_work().
Therefore split etr and stp machine check handling into first and second
level handling. The second level handling will then issue the queue_work()
call in process context which avoids the potential deadlock.
Signed-off-by: default avatarHeiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: default avatarMartin Schwidefsky <schwidefsky@de.ibm.com>
parent 7cc8944e
...@@ -211,8 +211,9 @@ static inline int etr_ptff(void *ptff_block, unsigned int func) ...@@ -211,8 +211,9 @@ static inline int etr_ptff(void *ptff_block, unsigned int func)
#define ETR_PTFF_SGS 0x43 /* set gross steering rate */ #define ETR_PTFF_SGS 0x43 /* set gross steering rate */
/* Functions needed by the machine check handler */ /* Functions needed by the machine check handler */
void etr_switch_to_local(void); int etr_switch_to_local(void);
void etr_sync_check(void); int etr_sync_check(void);
void etr_queue_work(void);
/* notifier for syncs */ /* notifier for syncs */
extern struct atomic_notifier_head s390_epoch_delta_notifier; extern struct atomic_notifier_head s390_epoch_delta_notifier;
...@@ -253,7 +254,8 @@ struct stp_sstpi { ...@@ -253,7 +254,8 @@ struct stp_sstpi {
} __attribute__ ((packed)); } __attribute__ ((packed));
/* Functions needed by the machine check handler */ /* Functions needed by the machine check handler */
void stp_sync_check(void); int stp_sync_check(void);
void stp_island_check(void); int stp_island_check(void);
void stp_queue_work(void);
#endif /* __S390_ETR_H */ #endif /* __S390_ETR_H */
...@@ -28,6 +28,8 @@ struct mcck_struct { ...@@ -28,6 +28,8 @@ struct mcck_struct {
int kill_task; int kill_task;
int channel_report; int channel_report;
int warning; int warning;
unsigned int etr_queue : 1;
unsigned int stp_queue : 1;
unsigned long long mcck_code; unsigned long long mcck_code;
}; };
...@@ -81,6 +83,10 @@ void s390_handle_mcck(void) ...@@ -81,6 +83,10 @@ void s390_handle_mcck(void)
if (xchg(&mchchk_wng_posted, 1) == 0) if (xchg(&mchchk_wng_posted, 1) == 0)
kill_cad_pid(SIGPWR, 1); kill_cad_pid(SIGPWR, 1);
} }
if (mcck.etr_queue)
etr_queue_work();
if (mcck.stp_queue)
stp_queue_work();
if (mcck.kill_task) { if (mcck.kill_task) {
local_irq_enable(); local_irq_enable();
printk(KERN_EMERG "mcck: Terminating task because of machine " printk(KERN_EMERG "mcck: Terminating task because of machine "
...@@ -323,13 +329,15 @@ void notrace s390_do_machine_check(struct pt_regs *regs) ...@@ -323,13 +329,15 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
if (mci->ed && mci->ec) { if (mci->ed && mci->ec) {
/* External damage */ /* External damage */
if (S390_lowcore.external_damage_code & (1U << ED_ETR_SYNC)) if (S390_lowcore.external_damage_code & (1U << ED_ETR_SYNC))
etr_sync_check(); mcck->etr_queue |= etr_sync_check();
if (S390_lowcore.external_damage_code & (1U << ED_ETR_SWITCH)) if (S390_lowcore.external_damage_code & (1U << ED_ETR_SWITCH))
etr_switch_to_local(); mcck->etr_queue |= etr_switch_to_local();
if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC)) if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC))
stp_sync_check(); mcck->stp_queue |= stp_sync_check();
if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND)) if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND))
stp_island_check(); mcck->stp_queue |= stp_island_check();
if (mcck->etr_queue || mcck->stp_queue)
set_cpu_flag(CIF_MCCK_PENDING);
} }
if (mci->se) if (mci->se)
/* Storage error uncorrected */ /* Storage error uncorrected */
......
...@@ -542,16 +542,17 @@ arch_initcall(etr_init); ...@@ -542,16 +542,17 @@ arch_initcall(etr_init);
* Switch to local machine check. This is called when the last usable * Switch to local machine check. This is called when the last usable
* ETR port goes inactive. After switch to local the clock is not in sync. * ETR port goes inactive. After switch to local the clock is not in sync.
*/ */
void etr_switch_to_local(void) int etr_switch_to_local(void)
{ {
if (!etr_eacr.sl) if (!etr_eacr.sl)
return; return 0;
disable_sync_clock(NULL); disable_sync_clock(NULL);
if (!test_and_set_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events)) { if (!test_and_set_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events)) {
etr_eacr.es = etr_eacr.sl = 0; etr_eacr.es = etr_eacr.sl = 0;
etr_setr(&etr_eacr); etr_setr(&etr_eacr);
queue_work(time_sync_wq, &etr_work); return 1;
} }
return 0;
} }
/* /*
...@@ -560,16 +561,22 @@ void etr_switch_to_local(void) ...@@ -560,16 +561,22 @@ void etr_switch_to_local(void)
* After a ETR sync check the clock is not in sync. The machine check * After a ETR sync check the clock is not in sync. The machine check
* is broadcasted to all cpus at the same time. * is broadcasted to all cpus at the same time.
*/ */
void etr_sync_check(void) int etr_sync_check(void)
{ {
if (!etr_eacr.es) if (!etr_eacr.es)
return; return 0;
disable_sync_clock(NULL); disable_sync_clock(NULL);
if (!test_and_set_bit(ETR_EVENT_SYNC_CHECK, &etr_events)) { if (!test_and_set_bit(ETR_EVENT_SYNC_CHECK, &etr_events)) {
etr_eacr.es = 0; etr_eacr.es = 0;
etr_setr(&etr_eacr); etr_setr(&etr_eacr);
queue_work(time_sync_wq, &etr_work); return 1;
} }
return 0;
}
void etr_queue_work(void)
{
queue_work(time_sync_wq, &etr_work);
} }
/* /*
...@@ -1504,10 +1511,10 @@ static void stp_timing_alert(struct stp_irq_parm *intparm) ...@@ -1504,10 +1511,10 @@ static void stp_timing_alert(struct stp_irq_parm *intparm)
* After a STP sync check the clock is not in sync. The machine check * After a STP sync check the clock is not in sync. The machine check
* is broadcasted to all cpus at the same time. * is broadcasted to all cpus at the same time.
*/ */
void stp_sync_check(void) int stp_sync_check(void)
{ {
disable_sync_clock(NULL); disable_sync_clock(NULL);
queue_work(time_sync_wq, &stp_work); return 1;
} }
/* /*
...@@ -1516,12 +1523,16 @@ void stp_sync_check(void) ...@@ -1516,12 +1523,16 @@ void stp_sync_check(void)
* have matching CTN ids and have a valid stratum-1 configuration * have matching CTN ids and have a valid stratum-1 configuration
* but the configurations do not match. * but the configurations do not match.
*/ */
void stp_island_check(void) int stp_island_check(void)
{ {
disable_sync_clock(NULL); disable_sync_clock(NULL);
queue_work(time_sync_wq, &stp_work); return 1;
} }
void stp_queue_work(void)
{
queue_work(time_sync_wq, &stp_work);
}
static int stp_sync_clock(void *data) static int stp_sync_clock(void *data)
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment