Commit 970fbde1 authored by Lars Ellenberg's avatar Lars Ellenberg Committed by Philipp Reisner

drbd: flush drbd work queue before invalidate/invalidate remote

If you do back to back wait-sync/invalidate on a Primary in a tight loop,
during application IO load, you could trigger a race:
  kernel: block drbd6: FIXME going to queue 'set_n_write from StartingSync'
    but 'write from resync_finished' still pending?

Fix this by changing the order of the drbd_queue_work() and
the wake_up() in dec_ap_pending(), and adding the additional
drbd_flush_workqueue() before requesting the full sync.
Signed-off-by: default avatarPhilipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: default avatarLars Ellenberg <lars.ellenberg@linbit.com>
parent 6f1a6563
...@@ -2251,15 +2251,17 @@ static inline void dec_ap_bio(struct drbd_conf *mdev) ...@@ -2251,15 +2251,17 @@ static inline void dec_ap_bio(struct drbd_conf *mdev)
int ap_bio = atomic_dec_return(&mdev->ap_bio_cnt); int ap_bio = atomic_dec_return(&mdev->ap_bio_cnt);
D_ASSERT(ap_bio >= 0); D_ASSERT(ap_bio >= 0);
if (ap_bio == 0 && test_bit(BITMAP_IO, &mdev->flags)) {
if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags))
drbd_queue_work(&mdev->tconn->sender_work, &mdev->bm_io_work.w);
}
/* this currently does wake_up for every dec_ap_bio! /* this currently does wake_up for every dec_ap_bio!
* maybe rather introduce some type of hysteresis? * maybe rather introduce some type of hysteresis?
* e.g. (ap_bio == mxb/2 || ap_bio == 0) ? */ * e.g. (ap_bio == mxb/2 || ap_bio == 0) ? */
if (ap_bio < mxb) if (ap_bio < mxb)
wake_up(&mdev->misc_wait); wake_up(&mdev->misc_wait);
if (ap_bio == 0 && test_bit(BITMAP_IO, &mdev->flags)) {
if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags))
drbd_queue_work(&mdev->tconn->sender_work, &mdev->bm_io_work.w);
}
} }
static inline int drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val) static inline int drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val)
......
...@@ -2408,9 +2408,11 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info) ...@@ -2408,9 +2408,11 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
mdev = adm_ctx.mdev; mdev = adm_ctx.mdev;
/* If there is still bitmap IO pending, probably because of a previous /* If there is still bitmap IO pending, probably because of a previous
* resync just being finished, wait for it before requesting a new resync. */ * resync just being finished, wait for it before requesting a new resync.
* Also wait for it's after_state_ch(). */
drbd_suspend_io(mdev); drbd_suspend_io(mdev);
wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
drbd_flush_workqueue(mdev);
retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED); retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED);
...@@ -2475,9 +2477,11 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info) ...@@ -2475,9 +2477,11 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
mdev = adm_ctx.mdev; mdev = adm_ctx.mdev;
/* If there is still bitmap IO pending, probably because of a previous /* If there is still bitmap IO pending, probably because of a previous
* resync just being finished, wait for it before requesting a new resync. */ * resync just being finished, wait for it before requesting a new resync.
* Also wait for it's after_state_ch(). */
drbd_suspend_io(mdev); drbd_suspend_io(mdev);
wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
drbd_flush_workqueue(mdev);
retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED); retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED);
if (retcode < SS_SUCCESS) { if (retcode < SS_SUCCESS) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment