From cb7b91dc02b64cb15f8d21e830a698bd4173b35a Mon Sep 17 00:00:00 2001
From: Jason Dillaman <dillaman@redhat.com>
Date: Tue, 7 Apr 2020 22:12:19 -0400
Subject: [PATCH] rbd-mirror: unlink from remote snapshot if required

If a previous remote snapshot was synced but the unlink failed,
ensure we retry the unlink so that the remote can cleanup the unused
snapshot.

Signed-off-by: Jason Dillaman <dillaman@redhat.com>
---
 .../snapshot/test_mock_Replayer.cc            | 118 +++++++++++++++++-
 .../image_replayer/snapshot/Replayer.cc       |  54 +++++---
 .../image_replayer/snapshot/Replayer.h        |  48 +++----
 3 files changed, 175 insertions(+), 45 deletions(-)

diff --git a/src/test/rbd_mirror/image_replayer/snapshot/test_mock_Replayer.cc b/src/test/rbd_mirror/image_replayer/snapshot/test_mock_Replayer.cc
index 17407e0e466..02cbc768460 100644
--- a/src/test/rbd_mirror/image_replayer/snapshot/test_mock_Replayer.cc
+++ b/src/test/rbd_mirror/image_replayer/snapshot/test_mock_Replayer.cc
@@ -819,7 +819,20 @@ TEST_F(TestMockImageReplayerSnapshotReplayer, SyncSnapshot) {
          4, true, 0, {}},
        0, {}, 0, 0, {}}},
     }, 0);
-  expect_is_refresh_required(mock_remote_image_ctx, false);
+  expect_is_refresh_required(mock_remote_image_ctx, true);
+  expect_refresh(
+    mock_remote_image_ctx, {
+      {2U, librbd::SnapInfo{"snap2", cls::rbd::UserSnapshotNamespace{},
+       0, {}, 0, 0, {}}},
+      {3U, librbd::SnapInfo{"snap3", cls::rbd::MirrorSnapshotNamespace{
+         cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY, {""},
+         "", 0U, true, 0, {}},
+       0, {}, 0, 0, {}}},
+      {4U, librbd::SnapInfo{"snap4", cls::rbd::MirrorSnapshotNamespace{
+         cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY, {"remote mirror peer uuid"},
+         "", 0U, true, 0, {}},
+       0, {}, 0, 0, {}}}
+    }, 0);
   expect_prune_non_primary_snapshot(mock_local_image_ctx, 11, 0);
 
   // idle
@@ -2079,7 +2092,19 @@ TEST_F(TestMockImageReplayerSnapshotReplayer, RemoteFailover) {
          {{1, 11}, {2, 12}, {3, CEPH_NOSNAP}}},
        0, {}, 0, 0, {}}},
     }, 0);
-  expect_is_refresh_required(mock_remote_image_ctx, false);
+  expect_is_refresh_required(mock_remote_image_ctx, true);
+  expect_refresh(
+    mock_remote_image_ctx, {
+      {1U, librbd::SnapInfo{"snap1", cls::rbd::UserSnapshotNamespace{},
+         0, {}, 0, 0, {}}},
+      {2U, librbd::SnapInfo{"snap2", cls::rbd::MirrorSnapshotNamespace{
+         cls::rbd::MIRROR_SNAPSHOT_STATE_NON_PRIMARY_DEMOTED,
+         {"remote mirror peer uuid"}, "local mirror uuid", 12U, true, 0, {}},
+       0, {}, 0, 0, {}}},
+      {3U, librbd::SnapInfo{"snap3", cls::rbd::MirrorSnapshotNamespace{
+         cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY, {}, "", 0U, true, 0, {}},
+       0, {}, 0, 0, {}}}
+    }, 0);
 
   // wake-up replayer
   update_watch_ctx->handle_notify();
@@ -2091,6 +2116,95 @@ TEST_F(TestMockImageReplayerSnapshotReplayer, RemoteFailover) {
                                         mock_remote_image_ctx));
 }
 
+TEST_F(TestMockImageReplayerSnapshotReplayer, UnlinkRemoteSnapshot) {
+  librbd::MockTestImageCtx mock_local_image_ctx{*m_local_image_ctx};
+  librbd::MockTestImageCtx mock_remote_image_ctx{*m_remote_image_ctx};
+
+  // it should attempt to unlink from remote snap1 since we don't need it
+  // anymore
+  mock_local_image_ctx.snap_info = {
+    {14U, librbd::SnapInfo{"snap4", cls::rbd::MirrorSnapshotNamespace{
+       cls::rbd::MIRROR_SNAPSHOT_STATE_NON_PRIMARY, {}, "remote mirror uuid",
+       4, true, 0, {}},
+     0, {}, 0, 0, {}}}};
+  mock_remote_image_ctx.snap_info = {
+    {1U, librbd::SnapInfo{"snap1", cls::rbd::MirrorSnapshotNamespace{
+       cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY, {"remote mirror peer uuid"},
+       "", 0U, true, 0, {}},
+     0, {}, 0, 0, {}}},
+    {4U, librbd::SnapInfo{"snap4", cls::rbd::MirrorSnapshotNamespace{
+       cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY, {"remote mirror peer uuid"},
+       "", 0U, true, 0, {}},
+     0, {}, 0, 0, {}}}};
+
+  MockThreads mock_threads(m_threads);
+  expect_work_queue_repeatedly(mock_threads);
+
+  MockReplayerListener mock_replayer_listener;
+  expect_notification(mock_threads, mock_replayer_listener);
+
+  InSequence seq;
+
+  MockInstanceWatcher mock_instance_watcher;
+  MockImageMeta mock_image_meta;
+  MockStateBuilder mock_state_builder(mock_local_image_ctx,
+                                      mock_remote_image_ctx,
+                                      mock_image_meta);
+  MockReplayer mock_replayer{&mock_threads, &mock_instance_watcher,
+                             "local mirror uuid", &m_pool_meta_cache,
+                             &mock_state_builder, &mock_replayer_listener};
+  m_pool_meta_cache.set_remote_pool_meta(
+    m_remote_io_ctx.get_id(),
+    {"remote mirror uuid", "remote mirror peer uuid"});
+
+  librbd::UpdateWatchCtx* update_watch_ctx = nullptr;
+
+  // init
+  expect_register_update_watcher(mock_local_image_ctx, &update_watch_ctx, 123,
+                                 0);
+  expect_register_update_watcher(mock_remote_image_ctx, &update_watch_ctx, 234,
+                                 0);
+
+  // unlink snap1
+  expect_load_image_meta(mock_image_meta, false, 0);
+  expect_is_refresh_required(mock_local_image_ctx, false);
+  expect_is_refresh_required(mock_remote_image_ctx, false);
+  MockUnlinkPeerRequest mock_unlink_peer_request;
+  expect_unlink_peer(mock_unlink_peer_request, 1, "remote mirror peer uuid",
+                     0);
+
+  // idle
+  expect_load_image_meta(mock_image_meta, false, 0);
+  expect_is_refresh_required(mock_local_image_ctx, false);
+  expect_is_refresh_required(mock_remote_image_ctx, true);
+  expect_refresh(
+    mock_remote_image_ctx, {
+      {2U, librbd::SnapInfo{"snap2", cls::rbd::UserSnapshotNamespace{},
+       0, {}, 0, 0, {}}},
+      {3U, librbd::SnapInfo{"snap3", cls::rbd::MirrorSnapshotNamespace{
+         cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY, {""},
+         "", 0U, true, 0, {}},
+       0, {}, 0, 0, {}}},
+      {4U, librbd::SnapInfo{"snap4", cls::rbd::MirrorSnapshotNamespace{
+         cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY, {"remote mirror peer uuid"},
+         "", 0U, true, 0, {}},
+       0, {}, 0, 0, {}}}
+    }, 0);
+
+  // fire init
+  C_SaferCond init_ctx;
+  mock_replayer.init(&init_ctx);
+  ASSERT_EQ(0, init_ctx.wait());
+
+  // wait for sync to complete
+  ASSERT_EQ(0, wait_for_notification(2));
+
+  // shut down
+  ASSERT_EQ(0, shut_down_entry_replayer(mock_replayer, mock_threads,
+                                        mock_local_image_ctx,
+                                        mock_remote_image_ctx));
+}
+
 } // namespace snapshot
 } // namespace image_replayer
 } // namespace mirror
diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.cc b/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.cc
index a28cb82e3bf..7980f525491 100644
--- a/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.cc
+++ b/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.cc
@@ -532,6 +532,7 @@ void Replayer<I>::scan_remote_mirror_snapshots(
 
   m_pending_snapshots = 0;
 
+  std::set<uint64_t> unlink_snap_ids;
   bool split_brain = false;
   bool remote_demoted = false;
   auto remote_image_ctx = m_state_builder->remote_image_ctx;
@@ -547,13 +548,17 @@ void Replayer<I>::scan_remote_mirror_snapshots(
 
     dout(15) << "remote mirror snapshot: id=" << snap_info_it->first << ", "
              << "mirror_ns=" << *mirror_ns << dendl;
+    remote_demoted = mirror_ns->is_demoted();
     if (!mirror_ns->is_primary() && !mirror_ns->is_non_primary()) {
       derr << "unknown remote mirror snapshot state" << dendl;
       handle_replay_complete(locker, -EINVAL,
                              "invalid remote mirror snapshot state");
       return;
-    } else {
-      remote_demoted = mirror_ns->is_demoted();
+    } else if (mirror_ns->mirror_peer_uuids.count(m_remote_mirror_peer_uuid) ==
+                 0) {
+      dout(15) << "skipping remote snapshot due to missing mirror peer"
+               << dendl;
+      continue;
     }
 
     auto remote_snap_id = snap_info_it->first;
@@ -564,10 +569,12 @@ void Replayer<I>::scan_remote_mirror_snapshots(
         ceph_assert(m_local_mirror_snap_ns.primary_mirror_uuid ==
                       m_state_builder->remote_mirror_uuid);
 
+        unlink_snap_ids.insert(remote_snap_id);
         if (m_local_mirror_snap_ns.complete &&
             m_local_mirror_snap_ns.primary_snap_id >= remote_snap_id) {
           // skip past completed remote snapshot
           m_remote_snap_id_start = remote_snap_id;
+          m_remote_mirror_snap_ns = *mirror_ns;
           dout(15) << "skipping synced remote snapshot " << remote_snap_id
                    << dendl;
           continue;
@@ -577,6 +584,7 @@ void Replayer<I>::scan_remote_mirror_snapshots(
           dout(15) << "skipping synced remote snapshot " << remote_snap_id
                    << " while search for in-progress sync" << dendl;
           m_remote_snap_id_start = remote_snap_id;
+          m_remote_mirror_snap_ns = *mirror_ns;
           continue;
         }
       } else if (m_local_mirror_snap_ns.state ==
@@ -604,17 +612,9 @@ void Replayer<I>::scan_remote_mirror_snapshots(
         // should not have been able to reach this
         ceph_assert(false);
       }
-    }
-
-    // find first snapshot where were are listed as a peer
-    if (!mirror_ns->is_primary()) {
+    } else if (!mirror_ns->is_primary()) {
       dout(15) << "skipping non-primary remote snapshot" << dendl;
       continue;
-    } else if (mirror_ns->mirror_peer_uuids.count(m_remote_mirror_peer_uuid) ==
-                 0) {
-      dout(15) << "skipping remote snapshot due to missing mirror peer"
-               << dendl;
-      continue;
     }
 
     // found candidate snapshot to sync
@@ -623,11 +623,25 @@ void Replayer<I>::scan_remote_mirror_snapshots(
       continue;
     }
 
+    // first primary snapshot where were are listed as a peer
     m_remote_snap_id_end = remote_snap_id;
     m_remote_mirror_snap_ns = *mirror_ns;
   }
   image_locker.unlock();
 
+  unlink_snap_ids.erase(m_remote_snap_id_start);
+  unlink_snap_ids.erase(m_remote_snap_id_end);
+  if (!unlink_snap_ids.empty()) {
+    locker->unlock();
+
+    // retry the unlinking process for a remote snapshot that we do not
+    // need anymore
+    auto remote_snap_id = *unlink_snap_ids.begin();
+    dout(10) << "unlinking from remote snapshot " << remote_snap_id << dendl;
+    unlink_peer(remote_snap_id);
+    return;
+  }
+
   if (m_remote_snap_id_end != CEPH_NOSNAP) {
     dout(10) << "found remote mirror snapshot: "
              << "remote_snap_id_start=" << m_remote_snap_id_start << ", "
@@ -1143,24 +1157,24 @@ void Replayer<I>::handle_notify_image_update(int r) {
     derr << "failed to notify local image update: " << cpp_strerror(r) << dendl;
   }
 
-  unlink_peer();
+  unlink_peer(m_remote_snap_id_start);
 }
 
 template <typename I>
-void Replayer<I>::unlink_peer() {
-  if (m_remote_snap_id_start == 0) {
+void Replayer<I>::unlink_peer(uint64_t remote_snap_id) {
+  if (remote_snap_id == 0) {
     finish_sync();
     return;
   }
 
   // local snapshot fully synced -- we no longer depend on the sync
   // start snapshot in the remote image
-  dout(10) << "remote_snap_id=" << m_remote_snap_id_start << dendl;
+  dout(10) << "remote_snap_id=" << remote_snap_id << dendl;
 
   auto ctx = create_context_callback<
     Replayer<I>, &Replayer<I>::handle_unlink_peer>(this);
   auto req = librbd::mirror::snapshot::UnlinkPeerRequest<I>::create(
-    m_state_builder->remote_image_ctx, m_remote_snap_id_start,
+    m_state_builder->remote_image_ctx, remote_snap_id,
     m_remote_mirror_peer_uuid, ctx);
   req->send();
 }
@@ -1187,9 +1201,11 @@ void Replayer<I>::finish_sync() {
     std::unique_lock locker{m_lock};
     notify_status_updated();
 
-    m_sync_in_progress = false;
-    m_instance_watcher->notify_sync_complete(
-      m_state_builder->local_image_ctx->id);
+    if (m_sync_in_progress) {
+      m_sync_in_progress = false;
+      m_instance_watcher->notify_sync_complete(
+        m_state_builder->local_image_ctx->id);
+    }
   }
 
   if (is_replay_interrupted()) {
diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.h b/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.h
index 1b50a3d1489..dda101b0c2a 100644
--- a/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.h
+++ b/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.h
@@ -123,37 +123,37 @@ private:
    *    |                                             | |
    *    | (new snapshot)                              | |
    *    |\--------------> COPY_SNAPSHOTS              | |
-   *    |                     |                       | |
-   *    |                     v                       | |
+   *    |                       |                     | |
+   *    |                       v                     | |
    *    |                 GET_REMOTE_IMAGE_STATE      | |
-   *    |                     |                       | |
-   *    |                     v                       | |
+   *    |                       |                     | |
+   *    |                       v                     | |
    *    |                 CREATE_NON_PRIMARY_SNAPSHOT | |
-   *    |                     |                       | |
-   *    |                     |/----------------------/ |
-   *    |                     |                         |
-   *    |                     v                         |
+   *    |                       |                     | |
+   *    |                       |/--------------------/ |
+   *    |                       |                       |
+   *    |                       v                       |
    *    |                 REQUEST_SYNC                  |
-   *    |                     |                         |
-   *    |                     v                         |
+   *    |                       |                       |
+   *    |                       v                       |
    *    |                 COPY_IMAGE                    |
-   *    |                     |                         |
-   *    |                     v                         |
+   *    |                       |                       |
+   *    |                       v                       |
    *    |                 APPLY_IMAGE_STATE             |
-   *    |                     |                         |
-   *    |                     v                         |
+   *    |                       |                       |
+   *    |                       v                       |
    *    |                 UPDATE_NON_PRIMARY_SNAPSHOT   |
-   *    |                     |                         |
-   *    |                     v                         |
+   *    |                       |                       |
+   *    |                       v                       |
    *    |                 NOTIFY_IMAGE_UPDATE           |
-   *    |                     |                         |
-   *    |                     v                         |
-   *    |                 UNLINK_PEER                   |
-   *    |                     |                         |
-   *    |                     v                         |
+   *    |                       |                       |
+   *    | (interrupted unlink)  v                       |
+   *    |\--------------> UNLINK_PEER                   |
+   *    |                       |                       |
+   *    |                       v                       |
    *    |                 NOTIFY_LISTENER               |
-   *    |                     |                         |
-   *    |                     \------------------------/|
+   *    |                       |                       |
+   *    |                       \----------------------/|
    *    |                                               |
    *    | (remote demoted)                              |
    *    \---------------> NOTIFY_LISTENER               |
@@ -288,7 +288,7 @@ private:
   void notify_image_update();
   void handle_notify_image_update(int r);
 
-  void unlink_peer();
+  void unlink_peer(uint64_t remote_snap_id);
   void handle_unlink_peer(int r);
 
   void finish_sync();