Commit 6c15d7af authored by Ira Snyder's avatar Ira Snyder Committed by Benjamin Herrenschmidt

carma-fpga: fix race between data dumping and DMA callback

When the system is under heavy load, we occasionally saw a problem where
the system would get a legitimate interrupt when they should be
disabled.

This was caused by the data_dma_cb() DMA callback unconditionally
re-enabling FPGA interrupts even when data dumping is disabled. When
data dumping was re-enabled, the irq handler would fire while a DMA was
in progress. The "BUG_ON(priv->inflight != NULL);" during the second
invocation of the DMA callback caused the system to crash.

To fix the issue, the priv->enabled boolean is moved under the
protection of the priv->lock spinlock. The DMA callback checks the
boolean to know whether to re-enable FPGA interrupts before it returns.

Now that it is fixed, the driver keeps FPGA interrupts disabled when it
expects that they are disabled, fixing the bug.
Signed-off-by: default avatarIra W. Snyder <iws@ovro.caltech.edu>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: default avatarBenjamin Herrenschmidt <benh@kernel.crashing.org>
parent 75ff85a8
...@@ -560,6 +560,9 @@ static void data_enable_interrupts(struct fpga_device *priv) ...@@ -560,6 +560,9 @@ static void data_enable_interrupts(struct fpga_device *priv)
/* flush the writes */ /* flush the writes */
fpga_read_reg(priv, 0, MMAP_REG_STATUS); fpga_read_reg(priv, 0, MMAP_REG_STATUS);
fpga_read_reg(priv, 1, MMAP_REG_STATUS);
fpga_read_reg(priv, 2, MMAP_REG_STATUS);
fpga_read_reg(priv, 3, MMAP_REG_STATUS);
/* switch back to the external interrupt source */ /* switch back to the external interrupt source */
iowrite32be(0x3F, priv->regs + SYS_IRQ_SOURCE_CTL); iowrite32be(0x3F, priv->regs + SYS_IRQ_SOURCE_CTL);
...@@ -591,8 +594,12 @@ static void data_dma_cb(void *data) ...@@ -591,8 +594,12 @@ static void data_dma_cb(void *data)
list_move_tail(&priv->inflight->entry, &priv->used); list_move_tail(&priv->inflight->entry, &priv->used);
priv->inflight = NULL; priv->inflight = NULL;
/* clear the FPGA status and re-enable interrupts */ /*
data_enable_interrupts(priv); * If data dumping is still enabled, then clear the FPGA
* status registers and re-enable FPGA interrupts
*/
if (priv->enabled)
data_enable_interrupts(priv);
spin_unlock_irqrestore(&priv->lock, flags); spin_unlock_irqrestore(&priv->lock, flags);
...@@ -708,6 +715,15 @@ static irqreturn_t data_irq(int irq, void *dev_id) ...@@ -708,6 +715,15 @@ static irqreturn_t data_irq(int irq, void *dev_id)
spin_lock(&priv->lock); spin_lock(&priv->lock);
/*
* This is an error case that should never happen.
*
* If this driver has a bug and manages to re-enable interrupts while
* a DMA is in progress, then we will hit this statement and should
* start paying attention immediately.
*/
BUG_ON(priv->inflight != NULL);
/* hide the interrupt by switching the IRQ driver to GPIO */ /* hide the interrupt by switching the IRQ driver to GPIO */
data_disable_interrupts(priv); data_disable_interrupts(priv);
...@@ -762,11 +778,15 @@ static irqreturn_t data_irq(int irq, void *dev_id) ...@@ -762,11 +778,15 @@ static irqreturn_t data_irq(int irq, void *dev_id)
*/ */
static int data_device_enable(struct fpga_device *priv) static int data_device_enable(struct fpga_device *priv)
{ {
bool enabled;
u32 val; u32 val;
int ret; int ret;
/* multiple enables are safe: they do nothing */ /* multiple enables are safe: they do nothing */
if (priv->enabled) spin_lock_irq(&priv->lock);
enabled = priv->enabled;
spin_unlock_irq(&priv->lock);
if (enabled)
return 0; return 0;
/* check that the FPGAs are programmed */ /* check that the FPGAs are programmed */
...@@ -797,6 +817,9 @@ static int data_device_enable(struct fpga_device *priv) ...@@ -797,6 +817,9 @@ static int data_device_enable(struct fpga_device *priv)
goto out_error; goto out_error;
} }
/* prevent the FPGAs from generating interrupts */
data_disable_interrupts(priv);
/* hookup the irq handler */ /* hookup the irq handler */
ret = request_irq(priv->irq, data_irq, IRQF_SHARED, drv_name, priv); ret = request_irq(priv->irq, data_irq, IRQF_SHARED, drv_name, priv);
if (ret) { if (ret) {
...@@ -804,11 +827,13 @@ static int data_device_enable(struct fpga_device *priv) ...@@ -804,11 +827,13 @@ static int data_device_enable(struct fpga_device *priv)
goto out_error; goto out_error;
} }
/* switch to the external FPGA IRQ line */ /* allow the DMA callback to re-enable FPGA interrupts */
data_enable_interrupts(priv); spin_lock_irq(&priv->lock);
/* success, we're enabled */
priv->enabled = true; priv->enabled = true;
spin_unlock_irq(&priv->lock);
/* allow the FPGAs to generate interrupts */
data_enable_interrupts(priv);
return 0; return 0;
out_error: out_error:
...@@ -834,41 +859,40 @@ static int data_device_enable(struct fpga_device *priv) ...@@ -834,41 +859,40 @@ static int data_device_enable(struct fpga_device *priv)
*/ */
static int data_device_disable(struct fpga_device *priv) static int data_device_disable(struct fpga_device *priv)
{ {
int ret; spin_lock_irq(&priv->lock);
/* allow multiple disable */ /* allow multiple disable */
if (!priv->enabled) if (!priv->enabled) {
spin_unlock_irq(&priv->lock);
return 0; return 0;
}
/*
* Mark the device disabled
*
* This stops DMA callbacks from re-enabling interrupts
*/
priv->enabled = false;
/* switch to the internal GPIO IRQ line */ /* prevent the FPGAs from generating interrupts */
data_disable_interrupts(priv); data_disable_interrupts(priv);
/* wait until all ongoing DMA has finished */
while (priv->inflight != NULL) {
spin_unlock_irq(&priv->lock);
wait_event(priv->wait, priv->inflight == NULL);
spin_lock_irq(&priv->lock);
}
spin_unlock_irq(&priv->lock);
/* unhook the irq handler */ /* unhook the irq handler */
free_irq(priv->irq, priv); free_irq(priv->irq, priv);
/*
* wait for all outstanding DMA to complete
*
* Device interrupts are disabled, therefore another buffer cannot
* be marked inflight.
*/
ret = wait_event_interruptible(priv->wait, priv->inflight == NULL);
if (ret)
return ret;
/* free the correlation table */ /* free the correlation table */
sg_free_table(&priv->corl_table); sg_free_table(&priv->corl_table);
priv->corl_nents = 0; priv->corl_nents = 0;
/*
* We are taking the spinlock not to protect priv->enabled, but instead
* to make sure that there are no readers in the process of altering
* the free or used lists while we are setting this flag.
*/
spin_lock_irq(&priv->lock);
priv->enabled = false;
spin_unlock_irq(&priv->lock);
/* free all buffers: the free and used lists are not being changed */ /* free all buffers: the free and used lists are not being changed */
data_free_buffers(priv); data_free_buffers(priv);
return 0; return 0;
...@@ -896,15 +920,6 @@ static unsigned int list_num_entries(struct list_head *list) ...@@ -896,15 +920,6 @@ static unsigned int list_num_entries(struct list_head *list)
static int data_debug_show(struct seq_file *f, void *offset) static int data_debug_show(struct seq_file *f, void *offset)
{ {
struct fpga_device *priv = f->private; struct fpga_device *priv = f->private;
int ret;
/*
* Lock the mutex first, so that we get an accurate value for enable
* Lock the spinlock next, to get accurate list counts
*/
ret = mutex_lock_interruptible(&priv->mutex);
if (ret)
return ret;
spin_lock_irq(&priv->lock); spin_lock_irq(&priv->lock);
...@@ -917,7 +932,6 @@ static int data_debug_show(struct seq_file *f, void *offset) ...@@ -917,7 +932,6 @@ static int data_debug_show(struct seq_file *f, void *offset)
seq_printf(f, "num_dropped: %d\n", priv->num_dropped); seq_printf(f, "num_dropped: %d\n", priv->num_dropped);
spin_unlock_irq(&priv->lock); spin_unlock_irq(&priv->lock);
mutex_unlock(&priv->mutex);
return 0; return 0;
} }
...@@ -970,7 +984,13 @@ static ssize_t data_en_show(struct device *dev, struct device_attribute *attr, ...@@ -970,7 +984,13 @@ static ssize_t data_en_show(struct device *dev, struct device_attribute *attr,
char *buf) char *buf)
{ {
struct fpga_device *priv = dev_get_drvdata(dev); struct fpga_device *priv = dev_get_drvdata(dev);
return snprintf(buf, PAGE_SIZE, "%u\n", priv->enabled); int ret;
spin_lock_irq(&priv->lock);
ret = snprintf(buf, PAGE_SIZE, "%u\n", priv->enabled);
spin_unlock_irq(&priv->lock);
return ret;
} }
static ssize_t data_en_set(struct device *dev, struct device_attribute *attr, static ssize_t data_en_set(struct device *dev, struct device_attribute *attr,
...@@ -986,6 +1006,7 @@ static ssize_t data_en_set(struct device *dev, struct device_attribute *attr, ...@@ -986,6 +1006,7 @@ static ssize_t data_en_set(struct device *dev, struct device_attribute *attr,
return -EINVAL; return -EINVAL;
} }
/* protect against concurrent enable/disable */
ret = mutex_lock_interruptible(&priv->mutex); ret = mutex_lock_interruptible(&priv->mutex);
if (ret) if (ret)
return ret; return ret;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment