path: root/drivers/block/drbd/drbd_state.c
diff options
authorLars Ellenberg <lars.ellenberg@linbit.com>2012-07-30 09:10:41 +0200
committerPhilipp Reisner <philipp.reisner@linbit.com>2012-11-08 16:58:40 +0100
commita324896b173e569fb831c5caa04ccd02ec0bc9ca (patch)
treefedb4c82e66c304c6ced91a9e83538af735ddb45 /drivers/block/drbd/drbd_state.c
parent8a943170711b7a4d63528ea8eb6a41cc91e79309 (diff)
drbd: do not reset rs_pending_cnt too early
Fix asserts like block drbd0: in got_BlockAck:4634: rs_pending_cnt = -35 < 0 ! We reset the resync lru cache and related information (rs_pending_cnt), once we successfully finished a resync or online verify, or if the replication connection is lost. We also need to reset it if a resync or online verify is aborted because a lower level disk failed. In that case the replication link is still established, and we may still have packets queued in the network buffers which want to touch rs_pending_cnt. We do not have any synchronization mechanism to know for sure when all such pending resync related packets have been drained. To avoid this counter to go negative (and violate the ASSERT that it will always be >= 0), just do not reset it when we lose a disk. It is good enough to make sure it is re-initialized before the next resync can start: reset it when we re-attach a disk. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block/drbd/drbd_state.c')
1 files changed, 7 insertions, 4 deletions
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index c9ec7d37632..ad307fb8dc2 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -1216,6 +1216,13 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
/* Do not change the order of the if above and the two below... */
if (os.pdsk == D_DISKLESS &&
ns.pdsk > D_DISKLESS && ns.pdsk != D_UNKNOWN) { /* attach on the peer */
+ /* we probably will start a resync soon.
+ * make sure those things are properly reset. */
+ mdev->rs_total = 0;
+ mdev->rs_failed = 0;
+ atomic_set(&mdev->rs_pending_cnt, 0);
+ drbd_rs_cancel_all(mdev);
drbd_send_state(mdev, ns);
@@ -1386,10 +1393,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
"ASSERT FAILED: disk is %s while going diskless\n",
- mdev->rs_total = 0;
- mdev->rs_failed = 0;
- atomic_set(&mdev->rs_pending_cnt, 0);
if (ns.conn >= C_CONNECTED)
drbd_send_state(mdev, ns);
/* corresponding get_ldev in __drbd_set_state