drm: Fix race that can lockup the kernel

The i915_vblank_swap() function schedules an automatic buffer swap upon receipt of the vertical sync interrupt. Such an operation is lengthy so it can't be allowed to happen in normal interrupt context, thus the DRM implements this by scheduling the work in a kernel softirq-scheduled tasklet. In order for the buffer swap to work safely, the DRM's central lock must be taken, via a call to drm_lock_take() located in drivers/char/drm/drm_irq.c within the function drm_locked_tasklet_func(). The lock-taking logic uses a non-interrupt-blocking spinlock to implement the manipulations needed to take the lock. This semantic would be safe if all attempts to use the spinlock only happen from process context. However this buffer swap happens from softirq context which is really a form of interrupt context. Thus we have an unsafe situation, in that drm_locked_tasklet_func() can block on a spinlock already taken by a thread in process context which will never get scheduled again because of the blocked softirq tasklet. This wedges the kernel hard. To trigger this bug, run a dual-head cloned mode configuration which uses the i915 drm, then execute an opengl application which synchronizes buffer swaps against the vertical sync interrupt. In my testing, a lockup always results after running anywhere from 5 minutes to an hour and a half. I believe dual-head is needed to really trigger the problem because then the vertical sync interrupt handling is no longer predictable (due to being interrupt-sourced from two different heads running at different speeds). This raises the probability of the tasklet trying to run while the userspace DRI is doing things to the GPU (and manipulating the DRM lock). The fix is to change the relevant spinlock semantics to be the interrupt-blocking form. After this change I am no longer able to trigger the lockup; the longest test run so far was 20 hours (test stopped after that point). Note: I have examined the places where this spinlock is being employed; all are reasonably short bounded sequences and should be suitable for interrupts being blocked without impacting overall kernel interrupt response latency. Signed-off-by: Mike Isely <isely@pobox.com> Signed-off-by: Dave Airlie <airlied@redhat.com>

drm: Fix race that can lockup the kernel
The i915_vblank_swap() function schedules an automatic buffer swap upon receipt of the vertical sync interrupt. Such an operation is lengthy so it can't be allowed to happen in normal interrupt context, thus the DRM implements this by scheduling the work in a kernel softirq-scheduled tasklet. In order for the buffer swap to work safely, the DRM's central lock must be taken, via a call to drm_lock_take() located in drivers/char/drm/drm_irq.c within the function drm_locked_tasklet_func(). The lock-taking logic uses a non-interrupt-blocking spinlock to implement the manipulations needed to take the lock. This semantic would be safe if all attempts to use the spinlock only happen from process context. However this buffer swap happens from softirq context which is really a form of interrupt context. Thus we have an unsafe situation, in that drm_locked_tasklet_func() can block on a spinlock already taken by a thread in process context which will never get scheduled again because of the blocked softirq tasklet. This wedges the kernel hard. To trigger this bug, run a dual-head cloned mode configuration which uses the i915 drm, then execute an opengl application which synchronizes buffer swaps against the vertical sync interrupt. In my testing, a lockup always results after running anywhere from 5 minutes to an hour and a half. I believe dual-head is needed to really trigger the problem because then the vertical sync interrupt handling is no longer predictable (due to being interrupt-sourced from two different heads running at different speeds). This raises the probability of the tasklet trying to run while the userspace DRI is doing things to the GPU (and manipulating the DRM lock). The fix is to change the relevant spinlock semantics to be the interrupt-blocking form. After this change I am no longer able to trigger the lockup; the longest test run so far was 20 hours (test stopped after that point). Note: I have examined the places where this spinlock is being employed; all are reasonably short bounded sequences and should be suitable for interrupts being blocked without impacting overall kernel interrupt response latency. Signed-off-by: Mike Isely <isely@pobox.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
9df5808c · Mike Isely · Dave Airlie · a978b30a · 9df5808c · 9df5808c
Commit 9df5808c authored Mar 13, 2008 by Mike Isely Committed by Dave Airlie Mar 17, 2008
Hide whitespace changes
Inline Side-by-side

Showing with 25 additions and 17 deletions

drivers/char/drm/drm_fops.c drivers/char/drm/drm_fops.c +5 -2

drivers/char/drm/drm_lock.c drivers/char/drm/drm_lock.c +20 -15

No files found.
--- a/drivers/char/drm/drm_fops.c
+++ b/drivers/char/drm/drm_fops.c
@@ -326,6 +326,7 @@ int drm_release(struct inode *inode, struct file *filp)
 	struct drm_file *file_priv = filp->private_data;
 	struct drm_device *dev = file_priv->head->dev;
 	int retcode = 0;
+	unsigned long irqflags;

 	lock_kernel();

@@ -357,9 +358,11 @@ int drm_release(struct inode *inode, struct file *filp)
 			 */

 			do{
-				spin_lock(&dev->lock.spinlock);
+				spin_lock_irqsave(&dev->lock.spinlock,
+						  irqflags);
 				locked = dev->lock.idle_has_lock;
-				spin_unlock(&dev->lock.spinlock);
+				spin_unlock_irqrestore(&dev->lock.spinlock,
+						       irqflags);
 				if (locked)
 					break;
 				schedule();

--- a/drivers/char/drm/drm_lock.c
+++ b/drivers/char/drm/drm_lock.c
@@ -53,6 +53,7 @@ int drm_lock(struct drm_device *dev, void *data, struct drm_file *file_priv)
 	DECLARE_WAITQUEUE(entry, current);
 	struct drm_lock *lock = data;
 	int ret = 0;
+	unsigned long irqflags;

 	++file_priv->lock_count;

@@ -71,9 +72,9 @@ int drm_lock(struct drm_device *dev, void *data, struct drm_file *file_priv)
 			return -EINVAL;

 	add_wait_queue(&dev->lock.lock_queue, &entry);
-	spin_lock(&dev->lock.spinlock);
+	spin_lock_irqsave(&dev->lock.spinlock, irqflags);
 	dev->lock.user_waiters++;
-	spin_unlock(&dev->lock.spinlock);
+	spin_unlock_irqrestore(&dev->lock.spinlock, irqflags);
 	for (;;) {
 		__set_current_state(TASK_INTERRUPTIBLE);
 		if (!dev->lock.hw_lock) {
@@ -95,9 +96,9 @@ int drm_lock(struct drm_device *dev, void *data, struct drm_file *file_priv)
 			break;
 		}
 	}
-	spin_lock(&dev->lock.spinlock);
+	spin_lock_irqsave(&dev->lock.spinlock, irqflags);
 	dev->lock.user_waiters--;
-	spin_unlock(&dev->lock.spinlock);
+	spin_unlock_irqrestore(&dev->lock.spinlock, irqflags);
 	__set_current_state(TASK_RUNNING);
 	remove_wait_queue(&dev->lock.lock_queue, &entry);

@@ -198,8 +199,9 @@ int drm_lock_take(struct drm_lock_data *lock_data,
 {
 	unsigned int old, new, prev;
 	volatile unsigned int *lock = &lock_data->hw_lock->lock;
+	unsigned long irqflags;

-	spin_lock(&lock_data->spinlock);
+	spin_lock_irqsave(&lock_data->spinlock, irqflags);
 	do {
 		old = *lock;
 		if (old & _DRM_LOCK_HELD)
@@ -211,7 +213,7 @@ int drm_lock_take(struct drm_lock_data *lock_data,
 		}
 		prev = cmpxchg(lock, old, new);
 	} while (prev != old);
-	spin_unlock(&lock_data->spinlock);
+	spin_unlock_irqrestore(&lock_data->spinlock, irqflags);

 	if (_DRM_LOCKING_CONTEXT(old) == context) {
 		if (old & _DRM_LOCK_HELD) {
@@ -272,15 +274,16 @@ int drm_lock_free(struct drm_lock_data *lock_data, unsigned int context)
 {
 	unsigned int old, new, prev;
 	volatile unsigned int *lock = &lock_data->hw_lock->lock;
+	unsigned long irqflags;

-	spin_lock(&lock_data->spinlock);
+	spin_lock_irqsave(&lock_data->spinlock, irqflags);
 	if (lock_data->kernel_waiters != 0) {
 		drm_lock_transfer(lock_data, 0);
 		lock_data->idle_has_lock = 1;
-		spin_unlock(&lock_data->spinlock);
+		spin_unlock_irqrestore(&lock_data->spinlock, irqflags);
 		return 1;
 	}
-	spin_unlock(&lock_data->spinlock);
+	spin_unlock_irqrestore(&lock_data->spinlock, irqflags);

 	do {
 		old = *lock;
@@ -344,19 +347,20 @@ static int drm_notifier(void *priv)
 void drm_idlelock_take(struct drm_lock_data *lock_data)
 {
 	int ret = 0;
+	unsigned long irqflags;

-	spin_lock(&lock_data->spinlock);
+	spin_lock_irqsave(&lock_data->spinlock, irqflags);
 	lock_data->kernel_waiters++;
 	if (!lock_data->idle_has_lock) {

-		spin_unlock(&lock_data->spinlock);
+		spin_unlock_irqrestore(&lock_data->spinlock, irqflags);
 		ret = drm_lock_take(lock_data, DRM_KERNEL_CONTEXT);
-		spin_lock(&lock_data->spinlock);
+		spin_lock_irqsave(&lock_data->spinlock, irqflags);

 		if (ret == 1)
 			lock_data->idle_has_lock = 1;
 	}
-	spin_unlock(&lock_data->spinlock);
+	spin_unlock_irqrestore(&lock_data->spinlock, irqflags);
 }
 EXPORT_SYMBOL(drm_idlelock_take);

@@ -364,8 +368,9 @@ void drm_idlelock_release(struct drm_lock_data *lock_data)
 {
 	unsigned int old, prev;
 	volatile unsigned int *lock = &lock_data->hw_lock->lock;
+	unsigned long irqflags;

-	spin_lock(&lock_data->spinlock);
+	spin_lock_irqsave(&lock_data->spinlock, irqflags);
 	if (--lock_data->kernel_waiters == 0) {
 		if (lock_data->idle_has_lock) {
 			do {
@@ -376,7 +381,7 @@ void drm_idlelock_release(struct drm_lock_data *lock_data)
 			lock_data->idle_has_lock = 0;
 		}
 	}
-	spin_unlock(&lock_data->spinlock);
+	spin_unlock_irqrestore(&lock_data->spinlock, irqflags);
 }
 EXPORT_SYMBOL(drm_idlelock_release);