Commit 6c15d7af authored by Ira Snyder's avatar Ira Snyder Committed by Benjamin Herrenschmidt

carma-fpga: fix race between data dumping and DMA callback

When the system is under heavy load, we occasionally saw a problem where
the system would get a legitimate interrupt when they should be
disabled.

This was caused by the data_dma_cb() DMA callback unconditionally
re-enabling FPGA interrupts even when data dumping is disabled. When
data dumping was re-enabled, the irq handler would fire while a DMA was
in progress. The "BUG_ON(priv->inflight != NULL);" during the second
invocation of the DMA callback caused the system to crash.

To fix the issue, the priv->enabled boolean is moved under the
protection of the priv->lock spinlock. The DMA callback checks the
boolean to know whether to re-enable FPGA interrupts before it returns.

Now that it is fixed, the driver keeps FPGA interrupts disabled when it
expects that they are disabled, fixing the bug.
Signed-off-by: default avatarIra W. Snyder <iws@ovro.caltech.edu>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: default avatarBenjamin Herrenschmidt <benh@kernel.crashing.org>
parent 75ff85a8
......@@ -560,6 +560,9 @@ static void data_enable_interrupts(struct fpga_device *priv)
/* flush the writes */
fpga_read_reg(priv, 0, MMAP_REG_STATUS);
fpga_read_reg(priv, 1, MMAP_REG_STATUS);
fpga_read_reg(priv, 2, MMAP_REG_STATUS);
fpga_read_reg(priv, 3, MMAP_REG_STATUS);
/* switch back to the external interrupt source */
iowrite32be(0x3F, priv->regs + SYS_IRQ_SOURCE_CTL);
......@@ -591,7 +594,11 @@ static void data_dma_cb(void *data)
list_move_tail(&priv->inflight->entry, &priv->used);
priv->inflight = NULL;
/* clear the FPGA status and re-enable interrupts */
/*
* If data dumping is still enabled, then clear the FPGA
* status registers and re-enable FPGA interrupts
*/
if (priv->enabled)
data_enable_interrupts(priv);
spin_unlock_irqrestore(&priv->lock, flags);
......@@ -708,6 +715,15 @@ static irqreturn_t data_irq(int irq, void *dev_id)
spin_lock(&priv->lock);
/*
* This is an error case that should never happen.
*
* If this driver has a bug and manages to re-enable interrupts while
* a DMA is in progress, then we will hit this statement and should
* start paying attention immediately.
*/
BUG_ON(priv->inflight != NULL);
/* hide the interrupt by switching the IRQ driver to GPIO */
data_disable_interrupts(priv);
......@@ -762,11 +778,15 @@ static irqreturn_t data_irq(int irq, void *dev_id)
*/
static int data_device_enable(struct fpga_device *priv)
{
bool enabled;
u32 val;
int ret;
/* multiple enables are safe: they do nothing */
if (priv->enabled)
spin_lock_irq(&priv->lock);
enabled = priv->enabled;
spin_unlock_irq(&priv->lock);
if (enabled)
return 0;
/* check that the FPGAs are programmed */
......@@ -797,6 +817,9 @@ static int data_device_enable(struct fpga_device *priv)
goto out_error;
}
/* prevent the FPGAs from generating interrupts */
data_disable_interrupts(priv);
/* hookup the irq handler */
ret = request_irq(priv->irq, data_irq, IRQF_SHARED, drv_name, priv);
if (ret) {
......@@ -804,11 +827,13 @@ static int data_device_enable(struct fpga_device *priv)
goto out_error;
}
/* switch to the external FPGA IRQ line */
data_enable_interrupts(priv);
/* success, we're enabled */
/* allow the DMA callback to re-enable FPGA interrupts */
spin_lock_irq(&priv->lock);
priv->enabled = true;
spin_unlock_irq(&priv->lock);
/* allow the FPGAs to generate interrupts */
data_enable_interrupts(priv);
return 0;
out_error:
......@@ -834,41 +859,40 @@ static int data_device_enable(struct fpga_device *priv)
*/
static int data_device_disable(struct fpga_device *priv)
{
int ret;
spin_lock_irq(&priv->lock);
/* allow multiple disable */
if (!priv->enabled)
if (!priv->enabled) {
spin_unlock_irq(&priv->lock);
return 0;
}
/* switch to the internal GPIO IRQ line */
/*
* Mark the device disabled
*
* This stops DMA callbacks from re-enabling interrupts
*/
priv->enabled = false;
/* prevent the FPGAs from generating interrupts */
data_disable_interrupts(priv);
/* wait until all ongoing DMA has finished */
while (priv->inflight != NULL) {
spin_unlock_irq(&priv->lock);
wait_event(priv->wait, priv->inflight == NULL);
spin_lock_irq(&priv->lock);
}
spin_unlock_irq(&priv->lock);
/* unhook the irq handler */
free_irq(priv->irq, priv);
/*
* wait for all outstanding DMA to complete
*
* Device interrupts are disabled, therefore another buffer cannot
* be marked inflight.
*/
ret = wait_event_interruptible(priv->wait, priv->inflight == NULL);
if (ret)
return ret;
/* free the correlation table */
sg_free_table(&priv->corl_table);
priv->corl_nents = 0;
/*
* We are taking the spinlock not to protect priv->enabled, but instead
* to make sure that there are no readers in the process of altering
* the free or used lists while we are setting this flag.
*/
spin_lock_irq(&priv->lock);
priv->enabled = false;
spin_unlock_irq(&priv->lock);
/* free all buffers: the free and used lists are not being changed */
data_free_buffers(priv);
return 0;
......@@ -896,15 +920,6 @@ static unsigned int list_num_entries(struct list_head *list)
static int data_debug_show(struct seq_file *f, void *offset)
{
struct fpga_device *priv = f->private;
int ret;
/*
* Lock the mutex first, so that we get an accurate value for enable
* Lock the spinlock next, to get accurate list counts
*/
ret = mutex_lock_interruptible(&priv->mutex);
if (ret)
return ret;
spin_lock_irq(&priv->lock);
......@@ -917,7 +932,6 @@ static int data_debug_show(struct seq_file *f, void *offset)
seq_printf(f, "num_dropped: %d\n", priv->num_dropped);
spin_unlock_irq(&priv->lock);
mutex_unlock(&priv->mutex);
return 0;
}
......@@ -970,7 +984,13 @@ static ssize_t data_en_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct fpga_device *priv = dev_get_drvdata(dev);
return snprintf(buf, PAGE_SIZE, "%u\n", priv->enabled);
int ret;
spin_lock_irq(&priv->lock);
ret = snprintf(buf, PAGE_SIZE, "%u\n", priv->enabled);
spin_unlock_irq(&priv->lock);
return ret;
}
static ssize_t data_en_set(struct device *dev, struct device_attribute *attr,
......@@ -986,6 +1006,7 @@ static ssize_t data_en_set(struct device *dev, struct device_attribute *attr,
return -EINVAL;
}
/* protect against concurrent enable/disable */
ret = mutex_lock_interruptible(&priv->mutex);
if (ret)
return ret;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment