btrfs-progs: scrub: Introduce offline scrub function

Now, btrfs-progs has a kernel scrub equivalent.
A new option, --offline is added to "btrfs scrub start".

If --offline is given, btrfs scrub will just act like kernel scrub, to
check every copy of extent and do a report on corrupted data and if it's
recoverable.

The advantage compare to kernel scrub is:
1) No race
   Unlike kernel scrub, which is done in parallel, offline scrub is done
   by a single thread.
   Although it may be slower than kernel one, it's safer and no false
   alert.

2) Correctness
   Kernel has a known bug (fix submitted) which will recovery RAID5/6
   data but screw up P/Q, due to the hardness coding in kernel.
   While in btrfs-progs, no page, (almost) no memory size limit, we're
   can focus on the scrub, and make things easier.

New offline scrub can detect and report P/Q corruption with
recoverability report, while kernel will only report data stripe error.

Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: Su <suy.fnst@cn.fujitsu.com>
Signed-off-by: Gu Jinxiang <gujx@cn.fujitsu.com>
This commit is contained in:
Qu Wenruo 2017-03-28 21:31:48 +08:00 committed by David Sterba
parent f3d259fe62
commit b782d087ae
5 changed files with 204 additions and 4 deletions

View File

@ -78,6 +78,15 @@ set IO priority classdata (see `ionice`(1) manpage)
force starting new scrub even if a scrub is already running,
this can useful when scrub status file is damaged and reports a running
scrub although it is not, but should not normally be necessary
--offline::::
Do offline scrub.
NOTE: it's experimental and repair is not supported yet.
--progress::::
Show progress status while doing offline scrub. (Default)
NOTE: it's only useful with option --offline.
--no-progress::::
Don't show progress status while doing offline scrub.
NOTE: it's only useful with option --offline.
*status* [-d] <path>|<device>::
Show status of a running scrub for the filesystem identified by 'path' or

View File

@ -36,12 +36,14 @@
#include <signal.h>
#include <stdarg.h>
#include <limits.h>
#include <getopt.h>
#include "ctree.h"
#include "ioctl.h"
#include "utils.h"
#include "volumes.h"
#include "disk-io.h"
#include "task-utils.h"
#include "commands.h"
#include "help.h"
@ -217,6 +219,32 @@ static void add_to_fs_stat(struct btrfs_scrub_progress *p,
_SCRUB_FS_STAT_MIN(ss, finished, fs_stat);
}
static void *print_offline_status(void *p)
{
struct task_context *ctx = p;
const char work_indicator[] = {'.', 'o', 'O', 'o' };
uint32_t count = 0;
task_period_start(ctx->info, 1000 /* 1s */);
while (1) {
printf("Doing offline scrub [%c] [%llu/%llu]\r",
work_indicator[count % 4], ctx->cur, ctx->all);
count++;
fflush(stdout);
task_period_wait(ctx->info);
}
return NULL;
}
static int print_offline_return(void *p)
{
printf("\n");
fflush(stdout);
return 0;
}
static void init_fs_stat(struct scrub_fs_stat *fs_stat)
{
memset(fs_stat, 0, sizeof(*fs_stat));
@ -1100,7 +1128,7 @@ static const char * const cmd_scrub_resume_usage[];
static int scrub_start(int argc, char **argv, int resume)
{
int fdmnt;
int fdmnt = -1;
int prg_fd = -1;
int fdres = -1;
int ret;
@ -1124,10 +1152,14 @@ static int scrub_start(int argc, char **argv, int resume)
int n_start = 0;
int n_skip = 0;
int n_resume = 0;
int offline = 0;
int progress_set = -1;
struct btrfs_ioctl_fs_info_args fi_args;
struct btrfs_ioctl_dev_info_args *di_args = NULL;
struct scrub_progress *sp = NULL;
struct scrub_fs_stat fs_stat;
struct task_context task = {0};
struct btrfs_fs_info *fs_info = NULL;
struct timeval tv;
struct sockaddr_un addr = {
.sun_family = AF_UNIX,
@ -1147,7 +1179,18 @@ static int scrub_start(int argc, char **argv, int resume)
int force = 0;
int nothing_to_resume = 0;
while ((c = getopt(argc, argv, "BdqrRc:n:f")) != -1) {
enum { GETOPT_VAL_OFFLINE = 257,
GETOPT_VAL_PROGRESS,
GETOPT_VAL_NO_PROGRESS};
static const struct option long_options[] = {
{ "offline", no_argument, NULL, GETOPT_VAL_OFFLINE},
{ "progress", no_argument, NULL, GETOPT_VAL_PROGRESS},
{ "no-progress", no_argument, NULL, GETOPT_VAL_NO_PROGRESS},
{ NULL, 0, NULL, 0}
};
while ((c = getopt_long(argc, argv, "BdqrRc:n:f", long_options,
NULL)) != -1) {
switch (c) {
case 'B':
do_background = 0;
@ -1175,6 +1218,15 @@ static int scrub_start(int argc, char **argv, int resume)
case 'f':
force = 1;
break;
case GETOPT_VAL_OFFLINE:
offline = 1;
break;
case GETOPT_VAL_PROGRESS:
progress_set = 1;
break;
case GETOPT_VAL_NO_PROGRESS:
progress_set = 0;
break;
case '?':
default:
usage(resume ? cmd_scrub_resume_usage :
@ -1189,6 +1241,53 @@ static int scrub_start(int argc, char **argv, int resume)
cmd_scrub_start_usage);
}
if (progress_set != -1 && !offline)
warning("Option --no-progress and --progress only works for --offline, ignored.");
if (offline) {
unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
ret = check_mounted(argv[optind]);
if (ret < 0) {
error("could not check mount status: %s", strerror(-ret));
err |= !!ret;
goto out;
} else if (ret) {
error("%s is currently mounted, aborting", argv[optind]);
ret = -EBUSY;
err |= !!ret;
goto out;
}
if (!do_background || do_wait || do_print ||
do_stats_per_dev || do_quiet || print_raw ||
ioprio_class != IOPRIO_CLASS_IDLE || ioprio_classdata ||
force)
warning("Offline scrub doesn't support extra options other than -r");
if (!readonly)
ctree_flags |= OPEN_CTREE_WRITES;
fs_info = open_ctree_fs_info(argv[optind], 0, 0, 0, ctree_flags);
if (!fs_info) {
error("cannot open file system");
ret = -EIO;
err = 1;
goto out;
}
if (progress_set == 1) {
task.info = task_init(print_offline_status,
print_offline_return, &task);
ret = btrfs_scrub(fs_info, &task, !readonly);
task_deinit(task.info);
} else {
ret = btrfs_scrub(fs_info, NULL, !readonly);
}
goto out;
}
spc.progress = NULL;
if (do_quiet && do_print)
do_print = 0;
@ -1545,7 +1644,10 @@ out:
if (sock_path[0])
unlink(sock_path);
}
close_file_or_dir(fdmnt, dirstream);
if (fdmnt >= 0)
close_file_or_dir(fdmnt, dirstream);
if (fs_info)
close_ctree_fs_info(fs_info);
if (err)
return 1;
@ -1563,9 +1665,10 @@ out:
}
static const char * const cmd_scrub_start_usage[] = {
"btrfs scrub start [-BdqrRf] [-c ioprio_class -n ioprio_classdata] <path>|<device>",
"btrfs scrub start [-BdqrRf] [-c ioprio_class -n ioprio_classdata] [--offline] [--progress][no-progress] <path>|<device>",
"Start a new scrub. If a scrub is already running, the new one fails.",
"",
"Online (kernel) scrub options:",
"-B do not background",
"-d stats per device (-B only)",
"-q be quiet",
@ -1575,6 +1678,11 @@ static const char * const cmd_scrub_start_usage[] = {
"-n set ioprio classdata (see ionice(1) manpage)",
"-f force starting new scrub even if a scrub is already running",
" this is useful when scrub stats record file is damaged",
"",
"Offline scrub options:",
"--offline start an offline scrub, not support other options",
"--progress show progress status (default), only work with option --offline",
"--no-progress do not show progress status, only work only with option --offline",
NULL
};

View File

@ -2768,4 +2768,10 @@ int btrfs_read_file(struct btrfs_root *root, u64 ino, u64 start, int len,
int btrfs_read_data_csums(struct btrfs_fs_info *fs_info, u64 start, u64 len,
void *csum_ret, unsigned long *bitmap_ret);
/* scrub.c */
struct task_context;
int btrfs_scrub(struct btrfs_fs_info *fs_info, struct task_context *ctx,
int write);
#endif

71
scrub.c
View File

@ -19,6 +19,7 @@
#include "disk-io.h"
#include "utils.h"
#include "kernel-lib/bitops.h"
#include "task-utils.h"
#include "kernel-lib/raid56.h"
/*
@ -1290,3 +1291,73 @@ out:
btrfs_free_path(path);
return ret;
}
int btrfs_scrub(struct btrfs_fs_info *fs_info, struct task_context *task,
int write)
{
u64 bg_nr = 0;
struct btrfs_block_group_cache *bg_cache;
struct btrfs_scrub_progress scrub_ctx = {0};
int ret = 0;
ASSERT(fs_info);
bg_cache = btrfs_lookup_first_block_group(fs_info, 0);
if (!bg_cache) {
error("no block group is found");
return -ENOENT;
}
++bg_nr;
if (task) {
/* get block group numbers for progress */
while (1) {
u64 bg_offset = bg_cache->key.objectid +
bg_cache->key.offset;
bg_cache = btrfs_lookup_first_block_group(fs_info,
bg_offset);
if (!bg_cache)
break;
++bg_nr;
}
task->all = bg_nr;
task->cur = 1;
task_start(task->info);
bg_cache = btrfs_lookup_first_block_group(fs_info, 0);
}
while (1) {
ret = scrub_one_block_group(fs_info, &scrub_ctx, bg_cache,
write);
if (ret < 0 && ret != -EIO)
break;
if (task)
task->cur++;
bg_cache = btrfs_lookup_first_block_group(fs_info,
bg_cache->key.objectid + bg_cache->key.offset);
if (!bg_cache)
break;
}
if (task)
task_stop(task->info);
printf("Scrub result:\n");
printf("Tree bytes scrubbed: %llu\n", scrub_ctx.tree_bytes_scrubbed);
printf("Tree extents scrubbed: %llu\n", scrub_ctx.tree_extents_scrubbed);
printf("Data bytes scrubbed: %llu\n", scrub_ctx.data_bytes_scrubbed);
printf("Data extents scrubbed: %llu\n", scrub_ctx.data_extents_scrubbed);
printf("Data bytes without csum: %llu\n", scrub_ctx.csum_discards *
fs_info->sectorsize);
printf("Read error: %llu\n", scrub_ctx.read_errors);
printf("Verify error: %llu\n", scrub_ctx.verify_errors);
printf("Csum error: %llu\n", scrub_ctx.csum_errors);
if (scrub_ctx.csum_errors || scrub_ctx.read_errors ||
scrub_ctx.uncorrectable_errors || scrub_ctx.verify_errors)
ret = 1;
else
ret = 0;
return ret;
}

View File

@ -178,4 +178,10 @@ u64 rand_u64(void);
unsigned int rand_range(unsigned int upper);
void init_rand_seed(u64 seed);
struct task_context {
u64 cur;
u64 all;
struct task_info *info;
};
#endif