From d0c8d8cfe029aeeadb88ab691a1f3223c914a91f Mon Sep 17 00:00:00 2001
From: Wido den Hollander <wido@42on.com>
Date: Wed, 7 Dec 2016 20:43:44 +0100
Subject: [PATCH] doc: Update CephFS disaster recovery documentation

Better documentation about spawning multiple workers to speed
up the recovery proces.

Signed-off-by: Wido den Hollander <wido@42on.com>
---
 doc/cephfs/disaster-recovery.rst | 32 ++++++++++++++++++++++----------
 src/tools/cephfs/DataScan.cc     |  6 ++++--
 2 files changed, 26 insertions(+), 12 deletions(-)
diff --git a/doc/cephfs/disaster-recovery.rst b/doc/cephfs/disaster-recovery.rst
index 78695e17c54..c40e0b411a8 100644
--- a/doc/cephfs/disaster-recovery.rst
+++ b/doc/cephfs/disaster-recovery.rst
@@ -140,25 +140,37 @@ it into the metadata pool.
     cephfs-data-scan scan_extents <data pool>
     cephfs-data-scan scan_inodes <data pool>
 
-This command may take a very long time if there are many
-files or very large files in the data pool.  To accelerate
-the process, run multiple instances of the tool.  Decide on
-a number of workers, and pass each worker a number within
-the range 0-(N_workers - 1), like so:
+This command may take a *very long* time if there are many
+files or very large files in the data pool.
+
+To accelerate the process, run multiple instances of the tool.
+
+Decide on a number of workers, and pass each worker a number within
+the range 0-(worker_m - 1).
+
+The example below shows how to run 4 workers simultaneously:
 
 ::
 
     # Worker 0
-    cephfs-data-scan scan_extents --worker_n 0 --worker_m 1 <data pool>
+    cephfs-data-scan scan_extents --worker_n 0 --worker_m 4 <data pool>
     # Worker 1
-    cephfs-data-scan scan_extents --worker_n 1 --worker_m 1<data pool> 1 1
+    cephfs-data-scan scan_extents --worker_n 1 --worker_m 4 <data pool>
+    # Worker 2
+    cephfs-data-scan scan_extents --worker_n 2 --worker_m 4 <data pool>
+    # Worker 3
+    cephfs-data-scan scan_extents --worker_n 3 --worker_m 4 <data pool>
 
     # Worker 0
-    cephfs-data-scan scan_inodes --worker_n 0 --worker_m 1 <data pool>
+    cephfs-data-scan scan_inodes --worker_n 0 --worker_m 4 <data pool>
     # Worker 1
-    cephfs-data-scan scan_inodes --worker_n 1 --worker_m 1 <data pool>
+    cephfs-data-scan scan_inodes --worker_n 1 --worker_m 4 <data pool>
+    # Worker 2
+    cephfs-data-scan scan_inodes --worker_n 2 --worker_m 4 <data pool>
+    # Worker 3
+    cephfs-data-scan scan_inodes --worker_n 3 --worker_m 4 <data pool>
 
-It is important to ensure that all workers have completed the
+It is **important** to ensure that all workers have completed the
 scan_extents phase before any workers enter the scan_inodes phase.
 
 After completing the metadata recovery, you may want to run cleanup
diff --git a/src/tools/cephfs/DataScan.cc b/src/tools/cephfs/DataScan.cc
index 2bd70406715..a1d03c4cde0 100644
--- a/src/tools/cephfs/DataScan.cc
+++ b/src/tools/cephfs/DataScan.cc
@@ -33,14 +33,16 @@ void DataScan::usage()
 {
   std::cout << "Usage: \n"
     << "  cephfs-data-scan init [--force-init]\n"
-    << "  cephfs-data-scan scan_extents [--force-pool] <data pool name>\n"
-    << "  cephfs-data-scan scan_inodes [--force-pool] [--force-corrupt] <data pool name>\n"
+    << "  cephfs-data-scan scan_extents [--force-pool] [--worker_n N --worker_m M] <data pool name>\n"
+    << "  cephfs-data-scan scan_inodes [--force-pool] [--force-corrupt] [--worker_n N --worker_m M] <data pool name>\n"
     << "  cephfs-data-scan pg_files <path> <pg id> [<pg id>...]\n"
     << "  cephfs-data-scan scan_links\n"
     << "\n"
     << "    --force-corrupt: overrite apparently corrupt structures\n"
     << "    --force-init: write root inodes even if they exist\n"
     << "    --force-pool: use data pool even if it is not in FSMap\n"
+    << "    --worker_m: Maximum number of workers\n"
+    << "    --worker_n: Worker number, range 0-(worker_m-1)\n"
     << "\n"
     << "  cephfs-data-scan scan_frags [--force-corrupt]\n"
     << "  cephfs-data-scan cleanup <data pool name>\n"