drbd: Move the CREATE_BARRIER flag from connection to device

That is necessary since the whole transfer log is per connection(tconn) and not per device(mdev). This bug caused list corruption on the worker list. When a barrier is queued for sending in the context of one device, another device did not see the CREATE_BARRIER bit, and queued the same object again -> list corruption. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>

drbd: Move the CREATE_BARRIER flag from connection to device
That is necessary since the whole transfer log is per connection(tconn) and not per device(mdev). This bug caused list corruption on the worker list. When a barrier is queued for sending in the context of one device, another device did not see the CREATE_BARRIER bit, and queued the same object again -> list corruption. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
6936fcb4 · Philipp Reisner · 36baf611 · 6936fcb4 · 6936fcb4 · 6936fcb4
Commit 6936fcb4 authored Nov 10, 2011 by Philipp Reisner
Showing with 14 additions and 18 deletions

drivers/block/drbd/drbd_int.h drivers/block/drbd/drbd_int.h +1 -1

drivers/block/drbd/drbd_main.c drivers/block/drbd/drbd_main.c +3 -8

drivers/block/drbd/drbd_req.c drivers/block/drbd/drbd_req.c +10 -9

No files found.
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -670,7 +670,6 @@ enum {

 /* flag bits per mdev */
 enum {
-	CREATE_BARRIER,		/* next P_DATA is preceded by a P_BARRIER */
 	UNPLUG_QUEUED,		/* only relevant with kernel 2.4 */
 	UNPLUG_REMOTE,		/* sending a "UnplugRemote" could help */
 	MD_DIRTY,		/* current uuids and flags not yet on disk */
@@ -813,6 +812,7 @@ enum {
 	CONN_WD_ST_CHG_OKAY,
 	CONN_WD_ST_CHG_FAIL,
 	CONN_DRY_RUN,		/* Expect disconnect after resync handshake. */
+	CREATE_BARRIER,		/* next P_DATA is preceded by a P_BARRIER */
 };

 struct drbd_tconn {			/* is a resource from the config file */

--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -320,7 +320,7 @@ void tl_release(struct drbd_tconn *tconn, unsigned int barrier_nr,
 	mdev = b->w.mdev;

 	nob = b->next;
-	if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) {
+	if (test_and_clear_bit(CREATE_BARRIER, &tconn->flags)) {
 		_tl_add_barrier(tconn, b);
 		if (nob)
 			tconn->oldest_tle = nob;
@@ -381,7 +381,7 @@ void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what)
 				if (b->w.cb == NULL) {
 					b->w.cb = w_send_barrier;
 					inc_ap_pending(b->w.mdev);
-					set_bit(CREATE_BARRIER, &b->w.mdev->flags);
+					set_bit(CREATE_BARRIER, &tconn->flags);
 				}

 				drbd_queue_work(&tconn->data.work, &b->w);
@@ -448,10 +448,8 @@ void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what)
 */
 void tl_clear(struct drbd_tconn *tconn)
 {
-	struct drbd_conf *mdev;
 	struct list_head *le, *tle;
 	struct drbd_request *r;
-	int vnr;

 	spin_lock_irq(&tconn->req_lock);

@@ -470,10 +468,7 @@ void tl_clear(struct drbd_tconn *tconn)
 	}

 	/* ensure bit indicating barrier is required is clear */
-	rcu_read_lock();
-	idr_for_each_entry(&tconn->volumes, mdev, vnr)
-		clear_bit(CREATE_BARRIER, &mdev->flags);
-	rcu_read_unlock();
+	clear_bit(CREATE_BARRIER, &tconn->flags);

 	spin_unlock_irq(&tconn->req_lock);
 }

--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -146,16 +146,17 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const
 static void queue_barrier(struct drbd_conf *mdev)
 {
 	struct drbd_tl_epoch *b;
+	struct drbd_tconn *tconn = mdev->tconn;

 	/* We are within the req_lock. Once we queued the barrier for sending,
 	 * we set the CREATE_BARRIER bit. It is cleared as soon as a new
 	 * barrier/epoch object is added. This is the only place this bit is
 	 * set. It indicates that the barrier for this epoch is already queued,
 	 * and no new epoch has been created yet. */
-	if (test_bit(CREATE_BARRIER, &mdev->flags))
+	if (test_bit(CREATE_BARRIER, &tconn->flags))
 		return;

-	b = mdev->tconn->newest_tle;
+	b = tconn->newest_tle;
 	b->w.cb = w_send_barrier;
 	b->w.mdev = mdev;
 	/* inc_ap_pending done here, so we won't
@@ -163,8 +164,8 @@ static void queue_barrier(struct drbd_conf *mdev)
 	 * dec_ap_pending will be done in got_BarrierAck
 	 * or (on connection loss) in tl_clear.  */
 	inc_ap_pending(mdev);
-	drbd_queue_work(&mdev->tconn->data.work, &b->w);
-	set_bit(CREATE_BARRIER, &mdev->flags);
+	drbd_queue_work(&tconn->data.work, &b->w);
+	set_bit(CREATE_BARRIER, &tconn->flags);
 }

 static void _about_to_complete_local_write(struct drbd_conf *mdev,
@@ -479,7 +480,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,

 		/* see __drbd_make_request,
 		 * just after it grabs the req_lock */
-		D_ASSERT(test_bit(CREATE_BARRIER, &mdev->flags) == 0);
+		D_ASSERT(test_bit(CREATE_BARRIER, &mdev->tconn->flags) == 0);

 		req->epoch = mdev->tconn->newest_tle->br_number;

@@ -836,7 +837,7 @@ int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long s
 	 * if we lost that race, we retry.  */
 	if (rw == WRITE && (remote || send_oos) &&
 	    mdev->tconn->unused_spare_tle == NULL &&
-	    test_bit(CREATE_BARRIER, &mdev->flags)) {
+	    test_bit(CREATE_BARRIER, &mdev->tconn->flags)) {
 allocate_barrier:
 		b = kmalloc(sizeof(struct drbd_tl_epoch), GFP_NOIO);
 		if (!b) {
@@ -893,7 +894,7 @@ int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long s
 	}
 	if (rw == WRITE && (remote || send_oos) &&
 	    mdev->tconn->unused_spare_tle == NULL &&
-	    test_bit(CREATE_BARRIER, &mdev->flags)) {
+	    test_bit(CREATE_BARRIER, &mdev->tconn->flags)) {
 		/* someone closed the current epoch
 		 * while we were grabbing the spinlock */
 		spin_unlock_irq(&mdev->tconn->req_lock);
@@ -915,12 +916,12 @@ int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long s
 	 * make sure that, if this is a write request and it triggered a
 	 * barrier packet, this request is queued within the same spinlock. */
 	if ((remote || send_oos) && mdev->tconn->unused_spare_tle &&
-	    test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) {
+	    test_and_clear_bit(CREATE_BARRIER, &mdev->tconn->flags)) {
 		_tl_add_barrier(mdev->tconn, mdev->tconn->unused_spare_tle);
 		mdev->tconn->unused_spare_tle = NULL;
 	} else {
 		D_ASSERT(!(remote && rw == WRITE &&
-			   test_bit(CREATE_BARRIER, &mdev->flags)));
+			   test_bit(CREATE_BARRIER, &mdev->tconn->flags)));
 	}

 	/* NOTE